PHOENIX-3246 Treat U+2002 as whitespace in parser
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/b65e385a Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/b65e385a Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/b65e385a Branch: refs/heads/calcite Commit: b65e385a828f89980ba4e5ae68f724d7cad50265 Parents: c02d6cb Author: Josh Elser <els...@apache.org> Authored: Sat Sep 3 19:21:43 2016 -0400 Committer: Josh Elser <els...@apache.org> Committed: Wed Sep 7 14:19:06 2016 -0400 ---------------------------------------------------------------------- phoenix-core/src/main/antlr3/PhoenixSQL.g | 5 +++-- .../src/main/java/org/apache/phoenix/parse/SQLParser.java | 2 +- .../java/org/apache/phoenix/parse/QueryParserTest.java | 10 ++++++++++ 3 files changed, 14 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/b65e385a/phoenix-core/src/main/antlr3/PhoenixSQL.g ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/antlr3/PhoenixSQL.g b/phoenix-core/src/main/antlr3/PhoenixSQL.g index d8f28e5..bc48b19 100644 --- a/phoenix-core/src/main/antlr3/PhoenixSQL.g +++ b/phoenix-core/src/main/antlr3/PhoenixSQL.g @@ -1223,7 +1223,8 @@ FIELDCHAR : LETTER | DIGIT | '_' - | '\u0080'..'\ufffe' + | '\u0080'..'\u2001' + | '\u2003'..'\ufffe' ; // A Letter is a lower or upper case ascii character. @@ -1283,7 +1284,7 @@ CHAR_ESC // whitespace (skip) WS - : ( ' ' | '\t' ) { $channel=HIDDEN; } + : ( ' ' | '\t' | '\u2002' ) { $channel=HIDDEN; } ; EOL http://git-wip-us.apache.org/repos/asf/phoenix/blob/b65e385a/phoenix-core/src/main/java/org/apache/phoenix/parse/SQLParser.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/parse/SQLParser.java b/phoenix-core/src/main/java/org/apache/phoenix/parse/SQLParser.java index 36f756c..1a80991 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/parse/SQLParser.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/parse/SQLParser.java @@ -192,4 +192,4 @@ public class SQLParser { return Character.toLowerCase(data[p + i - 1]); } } -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/phoenix/blob/b65e385a/phoenix-core/src/test/java/org/apache/phoenix/parse/QueryParserTest.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/test/java/org/apache/phoenix/parse/QueryParserTest.java b/phoenix-core/src/test/java/org/apache/phoenix/parse/QueryParserTest.java index 70f590f..e7127b7 100644 --- a/phoenix-core/src/test/java/org/apache/phoenix/parse/QueryParserTest.java +++ b/phoenix-core/src/test/java/org/apache/phoenix/parse/QueryParserTest.java @@ -34,6 +34,8 @@ import org.apache.phoenix.jdbc.PhoenixStatement.Operation; import org.apache.phoenix.schema.SortOrder; import org.junit.Test; +import com.google.common.base.Joiner; + public class QueryParserTest { private void parseQuery(String sql) throws IOException, SQLException { @@ -772,4 +774,12 @@ public class QueryParserTest { String sql = "SELECT * FROM T WHERE A LIKE 'a\\(d'"; parseQuery(sql); } + + @Test + public void testUnicodeSpace() throws Exception { + // U+2002 (8194) is a "EN Space" which looks just like a normal space (0x20 in ascii) + String unicodeEnSpace = String.valueOf(Character.toChars(8194)); + String sql = Joiner.on(unicodeEnSpace).join(new String[] {"SELECT", "*", "FROM", "T"}); + parseQuery(sql); + } }