This is an automated email from the ASF dual-hosted git repository.
dmsysolyatin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git
The following commit(s) were added to refs/heads/main by this push:
new 0d3cf2cbad [CALCITE-5668] When parsing SQL in PostgreSQL dialect,
allow unquoted table names to contain dollar sign, letters with diacritical
marks and non-Latin letters
0d3cf2cbad is described below
commit 0d3cf2cbadd97410ba4c63d657a5309c5ac7dc9c
Author: dssysolyatin <[email protected]>
AuthorDate: Thu Mar 3 16:28:44 2022 +0800
[CALCITE-5668] When parsing SQL in PostgreSQL dialect, allow unquoted table
names to contain dollar sign, letters with diacritical marks and non-Latin
letters
Co-authored-by: xurenhe <[email protected]>
Close apache/calcite#3215
Close apache/calcite#2737
---
babel/src/main/codegen/config.fmpp | 12 ++++++++++++
.../java/org/apache/calcite/test/BabelParserTest.java | 17 +++++++++++++++++
core/src/main/codegen/default_config.fmpp | 6 +++++-
core/src/main/codegen/templates/Parser.jj | 6 +++++-
4 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/babel/src/main/codegen/config.fmpp
b/babel/src/main/codegen/config.fmpp
index a61dee55e2..cc96b248b3 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -577,6 +577,18 @@ data: {
"< NULL_SAFE_EQUAL: \"<=>\" >"
]
+ # Custom identifier token.
+ #
+ # PostgreSQL allows letters with diacritical marks and non-Latin letters
+ # in the beginning of identifier and additionally dollar sign in the rest
of identifier.
+ # Letters with diacritical marks and non-Latin letters
+ # are represented by character codes 128 to 255 (or in octal \200 to \377).
+ # See
https://learn.microsoft.com/en-gb/office/vba/language/reference/user-interface-help/character-set-128255
+ # See
https://github.com/postgres/postgres/blob/master/src/backend/parser/scan.l
+ #
+ # MySQL allows digit in the beginning of identifier
+ customIdentifierToken: "< IDENTIFIER:
(<LETTER>|<DIGIT>|[\"\\200\"-\"\\377\"])
(<LETTER>|<DIGIT>|<DOLLAR>|[\"\\200\"-\"\\377\"])* >"
+
# Binary operators initialization.
# Example: "InfixCast".
extraBinaryExpressions: [
diff --git a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
index 310929592a..25b483e024 100644
--- a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
+++ b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
@@ -110,6 +110,23 @@ class BabelParserTest extends SqlParserTest {
sql(sql).ok(expected);
}
+ @Test void testIdentifier() {
+ // MySQL supports identifiers started with numbers
+ SqlParserFixture mysqlF = fixture().withDialect(MysqlSqlDialect.DEFAULT);
+ mysqlF.sql("select 1 as 1_c1 from t")
+ .ok("SELECT 1 AS `1_c1`\n"
+ + "FROM `t`");
+
+ // PostgreSQL allows identifier
+ // to begin with a letter (a-z, but also letters with diacritical marks
and non-Latin letters)
+ // or an underscore (_). Subsequent characters in an identifier
+ // can be letters, underscores, digits (0-9), or dollar signs ($)
+ SqlParserFixture postgreF =
fixture().withDialect(PostgresqlSqlDialect.DEFAULT);
+ postgreF.sql("select 1 as \200_$\251\377 from t")
+ .ok("SELECT 1 AS \"\200_$\251\377\"\n"
+ + "FROM \"t\"");
+ }
+
/** Tests that there are no reserved keywords. */
@Disabled
@Test void testKeywords() {
diff --git a/core/src/main/codegen/default_config.fmpp
b/core/src/main/codegen/default_config.fmpp
index 3e8e792e12..b1f6a59bf3 100644
--- a/core/src/main/codegen/default_config.fmpp
+++ b/core/src/main/codegen/default_config.fmpp
@@ -439,7 +439,11 @@ parser: {
# Example: "parserImpls.ftl".
implementationFiles: [
]
-
+
+ # Custom identifier token.
+ # Example: "< IDENTIFIER: (<LETTER>|<DIGIT>)+ >".
+ customIdentifierToken: ""
+
includePosixOperators: false
includeCompoundIdentifier: true
includeBraces: true
diff --git a/core/src/main/codegen/templates/Parser.jj
b/core/src/main/codegen/templates/Parser.jj
index 5ad62bbbcd..d818533bbf 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -8835,7 +8835,11 @@ MORE :
<DEFAULT, DQID, BTID, BQID> TOKEN :
{
- < IDENTIFIER: <LETTER> (<LETTER>|<DIGIT>)* >
+ <#if parser.customIdentifierToken?has_content>
+ ${parser.customIdentifierToken}
+ <#else>
+ < IDENTIFIER: <LETTER> (<LETTER>|<DIGIT>)* >
+ </#if>
}
<DEFAULT, DQID, BTID, BQID, BQHID> TOKEN :