>From Ritik Raj <[email protected]>:
Ritik Raj has uploaded this change for review. (
https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17450 )
Change subject: [ASTERIXDB-3155] Supporting escape backticks in SQL++
......................................................................
[ASTERIXDB-3155] Supporting escape backticks in SQL++
Change-Id: I5d9069c6aaa1365545f7e0ca728be6ea2ca4641d
---
M asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
M
asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/parser/ScopeChecker.java
2 files changed, 108 insertions(+), 41 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/50/17450/1
diff --git
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/parser/ScopeChecker.java
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/parser/ScopeChecker.java
index f5aa489..dedb5f6 100644
---
a/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/parser/ScopeChecker.java
+++
b/asterixdb/asterix-lang-common/src/main/java/org/apache/asterix/lang/common/parser/ScopeChecker.java
@@ -26,6 +26,7 @@
import org.apache.asterix.lang.common.context.RootScopeFactory;
import org.apache.asterix.lang.common.context.Scope;
import org.apache.asterix.lang.common.struct.Identifier;
+import org.apache.commons.text.StringEscapeUtils;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.Counter;
@@ -257,51 +258,96 @@
}
}
+ private static boolean isSurrogate(char r) {
+ return 0xd800 <= r && r < 0xe000;
+ }
+
public static String removeQuotesAndEscapes(String s) {
- char q = s.charAt(0); // simple or double quote
- String stripped = s.substring(1, s.length() - 1);
- int pos = stripped.indexOf('\\');
- if (pos < 0) {
- return stripped;
+
+ // It will not pass through lexer , but adding IllegalStateException
Condition , if something went wrong with lexer
+ if (s.length() < 2) {
+ throw new IllegalStateException("Should have been caught by the
lexer");
}
+
StringBuilder res = new StringBuilder();
- int start = 0;
- while (pos >= 0) {
- res.append(stripped.substring(start, pos));
- char c = stripped.charAt(pos + 1);
- switch (c) {
- case '/':
- case '\\':
- res.append(c);
- break;
- case 'b':
- res.append('\b');
- break;
- case 'f':
- res.append('\f');
- break;
- case 'n':
- res.append('\n');
- break;
- case 'r':
- res.append('\r');
- break;
- case 't':
- res.append('\t');
- break;
- case '\'':
- case '"':
- if (c == q) {
- res.append(c);
- }
- break;
- default:
- throw new IllegalStateException("'\\" + c + "' should have
been caught by the lexer");
- }
- start = pos + 2;
- pos = stripped.indexOf('\\', start);
+ char[] cray = s.toCharArray();
+
+ if (cray[0] != cray[cray.length - 1]) {
+ throw new IllegalStateException("Should have been caught by the
lexer");
}
- res.append(stripped.substring(start));
+
+ for (int pos = 1; pos < cray.length - 1;) {
+ char c = cray[pos];
+ pos++;
+ if (c == '\\') {
+ c = cray[pos];
+ pos++;
+ switch (c) {
+ case 'b':
+ res.append('\b');
+ break;
+ case 'f':
+ res.append('\f');
+ break;
+ case 'n':
+ res.append('\n');
+ break;
+ case 'r':
+ res.append('\r');
+ break;
+ case 't':
+ res.append('\t');
+ break;
+ case '/':
+ case '\\':
+ case '"':
+ case '\'':
+ case '`':
+ res.append(c);
+ break;
+ case 'u':
+ // handle Unicode
+ if (pos + 4 > cray.length - 1) {
+ throw new IllegalStateException("should have been
caught by the lexer");
+ }
+
+ String encodedValue = s.substring(pos - 2, pos + 4);
// \u0000
+ String decodedValue =
StringEscapeUtils.unescapeJava(encodedValue);
+
+ pos += 4;
+ // Check for Surrogate Unicode String
+ if (isSurrogate(decodedValue.charAt(0))) {
+ if (pos + 6 > cray.length - 1 || cray[pos] != '\\'
|| cray[pos + 1] != 'u') {
+ decodedValue = "\uFFFD";
+ } else {
+ encodedValue = encodedValue + s.substring(pos,
pos + 6);
+ pos += 6;
+ decodedValue =
StringEscapeUtils.unescapeJava(encodedValue);
+ }
+ }
+ res.append(decodedValue);
+ break;
+ default:
+ throw new IllegalStateException("'\\" + c + "' should
have been caught by the lexer");
+ }
+ } else {
+ res.append(c);
+ if (cray[0] == '\'' && c == '\'') { // if single quoted, allow
'' as an escaped single quote
+ if (pos >= cray.length - 1 || cray[pos] != '\'') {
+ throw new IllegalStateException("'" + c + "' should
have been caught by the lexer");
+ }
+ pos++;
+ } else if (cray[0] == '`' && c == '`') { // similar behavior
for ` (backtick)
+ if (pos >= cray.length - 1 || cray[pos] != '`') {
+ throw new IllegalStateException("`" + c + "' should
have been caught by the lexer");
+ }
+ pos++;
+ } else if (cray[0] == c) { // Illegal Character
+ throw new IllegalStateException("should have been caught
by lexer");
+ }
+ }
+ }
+
return res.toString();
}
diff --git a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
index 5ae1eb5..a2ec479 100644
--- a/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
+++ b/asterixdb/asterix-lang-sqlpp/src/main/javacc/SQLPP.jj
@@ -5756,7 +5756,10 @@
| <EscapeFormf>
| <EscapeNl>
| <EscapeCr>
+ | <EscapeUnicode>
| <EscapeTab>
+ | <EscapeBackTick>
+ | <EscapeBackTickBackTick>
| ~["`","\\"])* "`">
| <STRING_LITERAL : ( ("E")? "\"" (
<EscapeQuot>
@@ -5765,8 +5768,11 @@
| <EscapeBspace>
| <EscapeFormf>
| <EscapeNl>
+ | <EscapeBackTick>
+ | <EscapeBackTickBackTick>
| <EscapeCr>
| <EscapeTab>
+ | <EscapeUnicode>
| ~["\"","\\"])* "\"")
| ( ("E")? "\'" (
<EscapeApos>
@@ -5775,9 +5781,14 @@
| <EscapeBspace>
| <EscapeFormf>
| <EscapeNl>
+ | <EscapeBackTick>
+ | <EscapeBackTickBackTick>
+ | <EscapeUnicode>
| <EscapeCr>
| <EscapeTab>
| ~["\'","\\"])* "\'")>
+ | < #EscapeBackTick: "\\`" >
+ | < #EscapeBackTickBackTick: "``" >
| < #EscapeQuot: "\\\"" >
| < #EscapeApos: "\\\'" >
| < #EscapeBslash: "\\\\" >
@@ -5787,6 +5798,7 @@
| < #EscapeNl: "\\n" >
| < #EscapeCr: "\\r" >
| < #EscapeTab: "\\t" >
+ | < #EscapeUnicode: "\\u" (["0"-"9","a"-"f","A"-"F"]){4} >
}
<DEFAULT,IN_DBL_BRACE>
--
To view, visit https://asterix-gerrit.ics.uci.edu/c/asterixdb/+/17450
To unsubscribe, or for help writing mail filters, visit
https://asterix-gerrit.ics.uci.edu/settings
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Change-Id: I5d9069c6aaa1365545f7e0ca728be6ea2ca4641d
Gerrit-Change-Number: 17450
Gerrit-PatchSet: 1
Gerrit-Owner: Ritik Raj <[email protected]>
Gerrit-MessageType: newchange