This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new 012d99d3a3d [SPARK-41092][SQL] Do not use identifier to match interval 
units
012d99d3a3d is described below

commit 012d99d3a3d92819d5a26cddcfd566c46380b952
Author: Wenchen Fan <wenc...@databricks.com>
AuthorDate: Thu Nov 10 11:36:21 2022 +0300

    [SPARK-41092][SQL] Do not use identifier to match interval units
    
    ### What changes were proposed in this pull request?
    
    The current antlr-based SQL parser is pretty fragile due to the fact that 
we make the antlr parser rule pretty flexible and push more parsing logic to 
the Scala side (`AstBuilder`). A tiny change to the antlr parser rule may break 
the parser unexpectedly. As an example, in 
https://github.com/apache/spark/pull/38404 , we added a new parser rule to 
extend the INSERT syntax, and it breaks interval literal. `select b + interval 
'1 month' from values (1, 1)` can't be parsed after https://g [...]
    
    This PR makes the interval literal parser rule stricter. Now it lists all 
the allowed interval units instead of matching an identifier. This fixes the 
issue we hit in https://github.com/apache/spark/pull/38404 .
    
    In the future, we can revisit other parser rules and try to rely on antlr 
more to do the parsing work.
    
    ### Why are the changes needed?
    
    fix parser issues we hit in https://github.com/apache/spark/pull/38404
    
    ### Does this PR introduce _any_ user-facing change?
    
    The error message is changed a little bit for `SELECT INTERVAL 1 
wrong_unit`.
    
    ### How was this patch tested?
    
    existing tests
    
    Closes #38583 from cloud-fan/parser.
    
    Authored-by: Wenchen Fan <wenc...@databricks.com>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 docs/sql-ref-ansi-compliance.md                    | 11 +++++++
 .../spark/sql/catalyst/parser/SqlBaseLexer.g4      | 11 +++++++
 .../spark/sql/catalyst/parser/SqlBaseParser.g4     | 36 ++++++++++++++++++++--
 .../sql-tests/results/ansi/interval.sql.out        | 15 +++------
 .../resources/sql-tests/results/interval.sql.out   | 15 +++------
 5 files changed, 66 insertions(+), 22 deletions(-)

diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md
index a59d145d551..1501e14c604 100644
--- a/docs/sql-ref-ansi-compliance.md
+++ b/docs/sql-ref-ansi-compliance.md
@@ -407,6 +407,7 @@ Below is a list of all the keywords in Spark SQL.
 |DATEADD|non-reserved|non-reserved|non-reserved|
 |DATEDIFF|non-reserved|non-reserved|non-reserved|
 |DAY|non-reserved|non-reserved|non-reserved|
+|DAYS|non-reserved|non-reserved|non-reserved|
 |DAYOFYEAR|non-reserved|non-reserved|non-reserved|
 |DBPROPERTIES|non-reserved|non-reserved|non-reserved|
 |DEFAULT|non-reserved|non-reserved|non-reserved|
@@ -456,6 +457,7 @@ Below is a list of all the keywords in Spark SQL.
 |GROUPING|non-reserved|non-reserved|reserved|
 |HAVING|reserved|non-reserved|reserved|
 |HOUR|non-reserved|non-reserved|non-reserved|
+|HOURS|non-reserved|non-reserved|non-reserved|
 |IF|non-reserved|non-reserved|not a keyword|
 |IGNORE|non-reserved|non-reserved|non-reserved|
 |IMPORT|non-reserved|non-reserved|non-reserved|
@@ -495,13 +497,19 @@ Below is a list of all the keywords in Spark SQL.
 |MATCHED|non-reserved|non-reserved|non-reserved|
 |MERGE|non-reserved|non-reserved|non-reserved|
 |MICROSECOND|non-reserved|non-reserved|non-reserved|
+|MICROSECONDS|non-reserved|non-reserved|non-reserved|
 |MILLISECOND|non-reserved|non-reserved|non-reserved|
+|MILLISECONDS|non-reserved|non-reserved|non-reserved|
 |MINUTE|non-reserved|non-reserved|non-reserved|
+|MINUTES|non-reserved|non-reserved|non-reserved|
 |MINUS|non-reserved|strict-non-reserved|non-reserved|
 |MONTH|non-reserved|non-reserved|non-reserved|
+|MONTHS|non-reserved|non-reserved|non-reserved|
 |MSCK|non-reserved|non-reserved|non-reserved|
 |NAMESPACE|non-reserved|non-reserved|non-reserved|
 |NAMESPACES|non-reserved|non-reserved|non-reserved|
+|NANOSECOND|non-reserved|non-reserved|non-reserved|
+|NANOSECONDS|non-reserved|non-reserved|non-reserved|
 |NATURAL|reserved|strict-non-reserved|reserved|
 |NO|non-reserved|non-reserved|reserved|
 |NOT|reserved|non-reserved|reserved|
@@ -565,6 +573,7 @@ Below is a list of all the keywords in Spark SQL.
 |SCHEMA|non-reserved|non-reserved|non-reserved|
 |SCHEMAS|non-reserved|non-reserved|non-reserved|
 |SECOND|non-reserved|non-reserved|non-reserved|
+|SECONDS|non-reserved|non-reserved|non-reserved|
 |SELECT|reserved|non-reserved|reserved|
 |SEMI|non-reserved|strict-non-reserved|non-reserved|
 |SEPARATED|non-reserved|non-reserved|non-reserved|
@@ -631,10 +640,12 @@ Below is a list of all the keywords in Spark SQL.
 |VIEW|non-reserved|non-reserved|non-reserved|
 |VIEWS|non-reserved|non-reserved|non-reserved|
 |WEEK|non-reserved|non-reserved|non-reserved|
+|WEEKS|non-reserved|non-reserved|non-reserved|
 |WHEN|reserved|non-reserved|reserved|
 |WHERE|reserved|non-reserved|reserved|
 |WINDOW|non-reserved|non-reserved|reserved|
 |WITH|reserved|non-reserved|reserved|
 |WITHIN|reserved|non-reserved|reserved|
 |YEAR|non-reserved|non-reserved|non-reserved|
+|YEARS|non-reserved|non-reserved|non-reserved|
 |ZONE|non-reserved|non-reserved|non-reserved|
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
index e11391fa5f4..41adbda7b10 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4
@@ -140,6 +140,7 @@ CURRENT_TIME: 'CURRENT_TIME';
 CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
 CURRENT_USER: 'CURRENT_USER';
 DAY: 'DAY';
+DAYS: 'DAYS';
 DAYOFYEAR: 'DAYOFYEAR';
 DATA: 'DATA';
 DATABASE: 'DATABASE';
@@ -194,6 +195,7 @@ GROUP: 'GROUP';
 GROUPING: 'GROUPING';
 HAVING: 'HAVING';
 HOUR: 'HOUR';
+HOURS: 'HOURS';
 IF: 'IF';
 IGNORE: 'IGNORE';
 IMPORT: 'IMPORT';
@@ -233,12 +235,18 @@ MAP: 'MAP';
 MATCHED: 'MATCHED';
 MERGE: 'MERGE';
 MICROSECOND: 'MICROSECOND';
+MICROSECONDS: 'MICROSECONDS';
 MILLISECOND: 'MILLISECOND';
+MILLISECONDS: 'MILLISECONDS';
 MINUTE: 'MINUTE';
+MINUTES: 'MINUTES';
 MONTH: 'MONTH';
+MONTHS: 'MONTHS';
 MSCK: 'MSCK';
 NAMESPACE: 'NAMESPACE';
 NAMESPACES: 'NAMESPACES';
+NANOSECOND: 'NANOSECOND';
+NANOSECONDS: 'NANOSECONDS';
 NATURAL: 'NATURAL';
 NO: 'NO';
 NOT: 'NOT' | '!';
@@ -299,6 +307,7 @@ ROLLUP: 'ROLLUP';
 ROW: 'ROW';
 ROWS: 'ROWS';
 SECOND: 'SECOND';
+SECONDS: 'SECONDS';
 SCHEMA: 'SCHEMA';
 SCHEMAS: 'SCHEMAS';
 SELECT: 'SELECT';
@@ -367,12 +376,14 @@ VERSION: 'VERSION';
 VIEW: 'VIEW';
 VIEWS: 'VIEWS';
 WEEK: 'WEEK';
+WEEKS: 'WEEKS';
 WHEN: 'WHEN';
 WHERE: 'WHERE';
 WINDOW: 'WINDOW';
 WITH: 'WITH';
 WITHIN: 'WITHIN';
 YEAR: 'YEAR';
+YEARS: 'YEARS';
 ZONE: 'ZONE';
 //--SPARK-KEYWORD-LIST-END
 //============================
diff --git 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
index 9bd6e01c80a..7b673870af8 100644
--- 
a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
+++ 
b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4
@@ -955,7 +955,7 @@ errorCapturingMultiUnitsInterval
     ;
 
 multiUnitsInterval
-    : (intervalValue unit+=identifier)+
+    : (intervalValue unit+=unitInMultiUnits)+
     ;
 
 errorCapturingUnitToUnitInterval
@@ -963,7 +963,7 @@ errorCapturingUnitToUnitInterval
     ;
 
 unitToUnitInterval
-    : value=intervalValue from=identifier TO to=identifier
+    : value=intervalValue from=unitInUnitToUnit TO to=unitInUnitToUnit
     ;
 
 intervalValue
@@ -971,6 +971,16 @@ intervalValue
       (INTEGER_VALUE | DECIMAL_VALUE | stringLit)
     ;
 
+unitInMultiUnits
+    : NANOSECOND | NANOSECONDS | MICROSECOND | MICROSECONDS | MILLISECOND | 
MILLISECONDS
+    | SECOND | SECONDS | MINUTE | MINUTES | HOUR | HOURS | DAY | DAYS | WEEK | 
WEEKS
+    | MONTH | MONTHS | YEAR | YEARS
+    ;
+
+unitInUnitToUnit
+    : SECOND | MINUTE | HOUR | DAY | MONTH | YEAR
+    ;
+
 colPosition
     : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier
     ;
@@ -1202,6 +1212,7 @@ ansiNonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYS
     | DAYOFYEAR
     | DBPROPERTIES
     | DEFAULT
@@ -1236,6 +1247,7 @@ ansiNonReserved
     | GLOBAL
     | GROUPING
     | HOUR
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -1266,12 +1278,18 @@ ansiNonReserved
     | MATCHED
     | MERGE
     | MICROSECOND
+    | MICROSECONDS
     | MILLISECOND
+    | MILLISECONDS
     | MINUTE
+    | MINUTES
     | MONTH
+    | MONTHS
     | MSCK
     | NAMESPACE
     | NAMESPACES
+    | NANOSECOND
+    | NANOSECONDS
     | NO
     | NULLS
     | OF
@@ -1319,6 +1337,7 @@ ansiNonReserved
     | SCHEMA
     | SCHEMAS
     | SECOND
+    | SECONDS
     | SEMI
     | SEPARATED
     | SERDE
@@ -1372,8 +1391,10 @@ ansiNonReserved
     | VIEW
     | VIEWS
     | WEEK
+    | WEEKS
     | WINDOW
     | YEAR
+    | YEARS
     | ZONE
 //--ANSI-NON-RESERVED-END
     ;
@@ -1464,6 +1485,7 @@ nonReserved
     | DATEADD
     | DATEDIFF
     | DAY
+    | DAYS
     | DAYOFYEAR
     | DBPROPERTIES
     | DEFAULT
@@ -1511,6 +1533,7 @@ nonReserved
     | GROUPING
     | HAVING
     | HOUR
+    | HOURS
     | IF
     | IGNORE
     | IMPORT
@@ -1545,12 +1568,18 @@ nonReserved
     | MATCHED
     | MERGE
     | MICROSECOND
+    | MICROSECONDS
     | MILLISECOND
+    | MILLISECONDS
     | MINUTE
+    | MINUTES
     | MONTH
+    | MONTHS
     | MSCK
     | NAMESPACE
     | NAMESPACES
+    | NANOSECOND
+    | NANOSECONDS
     | NO
     | NOT
     | NULL
@@ -1610,6 +1639,7 @@ nonReserved
     | SCHEMA
     | SCHEMAS
     | SECOND
+    | SECONDS
     | SELECT
     | SEPARATED
     | SERDE
@@ -1672,12 +1702,14 @@ nonReserved
     | VIEW
     | VIEWS
     | WEEK
+    | WEEKS
     | WHEN
     | WHERE
     | WINDOW
     | WITH
     | WITHIN
     | YEAR
+    | YEARS
     | ZONE
 //--DEFAULT-NON-RESERVED-END
     ;
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 18ba4fb0ab7..9d298fe350a 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1500,17 +1500,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0062",
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
   "messageParameters" : {
-    "msg" : "Error parsing ' 1 fake_unit' to interval, invalid unit 
'fake_unit'"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 17,
-    "stopIndex" : 27,
-    "fragment" : "1 fake_unit"
-  } ]
+    "error" : "'fake_unit'",
+    "hint" : ""
+  }
 }
 
 
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index bdb9ba81ff3..716ea9335c9 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1381,17 +1381,12 @@ struct<>
 -- !query output
 org.apache.spark.sql.catalyst.parser.ParseException
 {
-  "errorClass" : "_LEGACY_ERROR_TEMP_0062",
+  "errorClass" : "PARSE_SYNTAX_ERROR",
+  "sqlState" : "42000",
   "messageParameters" : {
-    "msg" : "Error parsing ' 1 fake_unit' to interval, invalid unit 
'fake_unit'"
-  },
-  "queryContext" : [ {
-    "objectType" : "",
-    "objectName" : "",
-    "startIndex" : 17,
-    "stopIndex" : 27,
-    "fragment" : "1 fake_unit"
-  } ]
+    "error" : "'fake_unit'",
+    "hint" : ""
+  }
 }
 
 


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to