This is an automated email from the ASF dual-hosted git repository.

dmsysolyatin pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/calcite.git


The following commit(s) were added to refs/heads/main by this push:
     new 0d3cf2cbad [CALCITE-5668] When parsing SQL in PostgreSQL dialect, 
allow unquoted table names to contain dollar sign, letters with diacritical 
marks and non-Latin letters
0d3cf2cbad is described below

commit 0d3cf2cbadd97410ba4c63d657a5309c5ac7dc9c
Author: dssysolyatin <[email protected]>
AuthorDate: Thu Mar 3 16:28:44 2022 +0800

    [CALCITE-5668] When parsing SQL in PostgreSQL dialect, allow unquoted table 
names to contain dollar sign, letters with diacritical marks and non-Latin 
letters
    
    Co-authored-by: xurenhe <[email protected]>
    
    Close apache/calcite#3215
    Close apache/calcite#2737
---
 babel/src/main/codegen/config.fmpp                      | 12 ++++++++++++
 .../java/org/apache/calcite/test/BabelParserTest.java   | 17 +++++++++++++++++
 core/src/main/codegen/default_config.fmpp               |  6 +++++-
 core/src/main/codegen/templates/Parser.jj               |  6 +++++-
 4 files changed, 39 insertions(+), 2 deletions(-)

diff --git a/babel/src/main/codegen/config.fmpp 
b/babel/src/main/codegen/config.fmpp
index a61dee55e2..cc96b248b3 100644
--- a/babel/src/main/codegen/config.fmpp
+++ b/babel/src/main/codegen/config.fmpp
@@ -577,6 +577,18 @@ data: {
       "< NULL_SAFE_EQUAL: \"<=>\" >"
     ]
 
+    # Custom identifier token.
+    # 
+    # PostgreSQL allows letters with diacritical marks and non-Latin letters 
+    # in the beginning of identifier and additionally dollar sign in the rest 
of identifier. 
+    # Letters with diacritical marks and non-Latin letters
+    # are represented by character codes 128 to 255 (or in octal \200 to \377).
+    # See 
https://learn.microsoft.com/en-gb/office/vba/language/reference/user-interface-help/character-set-128255
+    # See 
https://github.com/postgres/postgres/blob/master/src/backend/parser/scan.l
+    #
+    # MySQL allows digit in the beginning of identifier
+    customIdentifierToken: "< IDENTIFIER: 
(<LETTER>|<DIGIT>|[\"\\200\"-\"\\377\"]) 
(<LETTER>|<DIGIT>|<DOLLAR>|[\"\\200\"-\"\\377\"])* >"
+    
     # Binary operators initialization.
     # Example: "InfixCast".
     extraBinaryExpressions: [
diff --git a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java 
b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
index 310929592a..25b483e024 100644
--- a/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
+++ b/babel/src/test/java/org/apache/calcite/test/BabelParserTest.java
@@ -110,6 +110,23 @@ class BabelParserTest extends SqlParserTest {
     sql(sql).ok(expected);
   }
 
+  @Test void testIdentifier() {
+    // MySQL supports identifiers started with numbers
+    SqlParserFixture mysqlF = fixture().withDialect(MysqlSqlDialect.DEFAULT);
+    mysqlF.sql("select 1 as 1_c1 from t")
+        .ok("SELECT 1 AS `1_c1`\n"
+            + "FROM `t`");
+
+    // PostgreSQL allows identifier
+    // to begin with a letter (a-z, but also letters with diacritical marks 
and non-Latin letters)
+    // or an underscore (_). Subsequent characters in an identifier
+    // can be letters, underscores, digits (0-9), or dollar signs ($)
+    SqlParserFixture postgreF = 
fixture().withDialect(PostgresqlSqlDialect.DEFAULT);
+    postgreF.sql("select 1 as \200_$\251\377 from t")
+        .ok("SELECT 1 AS \"\200_$\251\377\"\n"
+            + "FROM \"t\"");
+  }
+
   /** Tests that there are no reserved keywords. */
   @Disabled
   @Test void testKeywords() {
diff --git a/core/src/main/codegen/default_config.fmpp 
b/core/src/main/codegen/default_config.fmpp
index 3e8e792e12..b1f6a59bf3 100644
--- a/core/src/main/codegen/default_config.fmpp
+++ b/core/src/main/codegen/default_config.fmpp
@@ -439,7 +439,11 @@ parser: {
   # Example: "parserImpls.ftl".
   implementationFiles: [
   ]
-
+  
+  # Custom identifier token.
+  # Example: "< IDENTIFIER: (<LETTER>|<DIGIT>)+ >".
+  customIdentifierToken: ""
+      
   includePosixOperators: false
   includeCompoundIdentifier: true
   includeBraces: true
diff --git a/core/src/main/codegen/templates/Parser.jj 
b/core/src/main/codegen/templates/Parser.jj
index 5ad62bbbcd..d818533bbf 100644
--- a/core/src/main/codegen/templates/Parser.jj
+++ b/core/src/main/codegen/templates/Parser.jj
@@ -8835,7 +8835,11 @@ MORE :
 
 <DEFAULT, DQID, BTID, BQID> TOKEN :
 {
-    < IDENTIFIER: <LETTER> (<LETTER>|<DIGIT>)* >
+    <#if parser.customIdentifierToken?has_content>
+        ${parser.customIdentifierToken}
+    <#else>
+        < IDENTIFIER: <LETTER> (<LETTER>|<DIGIT>)* >
+    </#if>
 }
 
 <DEFAULT, DQID, BTID, BQID, BQHID> TOKEN :

Reply via email to