This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 012d99d3a3d [SPARK-41092][SQL] Do not use identifier to match interval units 012d99d3a3d is described below commit 012d99d3a3d92819d5a26cddcfd566c46380b952 Author: Wenchen Fan <wenc...@databricks.com> AuthorDate: Thu Nov 10 11:36:21 2022 +0300 [SPARK-41092][SQL] Do not use identifier to match interval units ### What changes were proposed in this pull request? The current antlr-based SQL parser is pretty fragile due to the fact that we make the antlr parser rule pretty flexible and push more parsing logic to the Scala side (`AstBuilder`). A tiny change to the antlr parser rule may break the parser unexpectedly. As an example, in https://github.com/apache/spark/pull/38404 , we added a new parser rule to extend the INSERT syntax, and it breaks interval literal. `select b + interval '1 month' from values (1, 1)` can't be parsed after https://g [...] This PR makes the interval literal parser rule stricter. Now it lists all the allowed interval units instead of matching an identifier. This fixes the issue we hit in https://github.com/apache/spark/pull/38404 . In the future, we can revisit other parser rules and try to rely on antlr more to do the parsing work. ### Why are the changes needed? fix parser issues we hit in https://github.com/apache/spark/pull/38404 ### Does this PR introduce _any_ user-facing change? The error message is changed a little bit for `SELECT INTERVAL 1 wrong_unit`. ### How was this patch tested? existing tests Closes #38583 from cloud-fan/parser. Authored-by: Wenchen Fan <wenc...@databricks.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- docs/sql-ref-ansi-compliance.md | 11 +++++++ .../spark/sql/catalyst/parser/SqlBaseLexer.g4 | 11 +++++++ .../spark/sql/catalyst/parser/SqlBaseParser.g4 | 36 ++++++++++++++++++++-- .../sql-tests/results/ansi/interval.sql.out | 15 +++------ .../resources/sql-tests/results/interval.sql.out | 15 +++------ 5 files changed, 66 insertions(+), 22 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index a59d145d551..1501e14c604 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -407,6 +407,7 @@ Below is a list of all the keywords in Spark SQL. |DATEADD|non-reserved|non-reserved|non-reserved| |DATEDIFF|non-reserved|non-reserved|non-reserved| |DAY|non-reserved|non-reserved|non-reserved| +|DAYS|non-reserved|non-reserved|non-reserved| |DAYOFYEAR|non-reserved|non-reserved|non-reserved| |DBPROPERTIES|non-reserved|non-reserved|non-reserved| |DEFAULT|non-reserved|non-reserved|non-reserved| @@ -456,6 +457,7 @@ Below is a list of all the keywords in Spark SQL. |GROUPING|non-reserved|non-reserved|reserved| |HAVING|reserved|non-reserved|reserved| |HOUR|non-reserved|non-reserved|non-reserved| +|HOURS|non-reserved|non-reserved|non-reserved| |IF|non-reserved|non-reserved|not a keyword| |IGNORE|non-reserved|non-reserved|non-reserved| |IMPORT|non-reserved|non-reserved|non-reserved| @@ -495,13 +497,19 @@ Below is a list of all the keywords in Spark SQL. |MATCHED|non-reserved|non-reserved|non-reserved| |MERGE|non-reserved|non-reserved|non-reserved| |MICROSECOND|non-reserved|non-reserved|non-reserved| +|MICROSECONDS|non-reserved|non-reserved|non-reserved| |MILLISECOND|non-reserved|non-reserved|non-reserved| +|MILLISECONDS|non-reserved|non-reserved|non-reserved| |MINUTE|non-reserved|non-reserved|non-reserved| +|MINUTES|non-reserved|non-reserved|non-reserved| |MINUS|non-reserved|strict-non-reserved|non-reserved| |MONTH|non-reserved|non-reserved|non-reserved| +|MONTHS|non-reserved|non-reserved|non-reserved| |MSCK|non-reserved|non-reserved|non-reserved| |NAMESPACE|non-reserved|non-reserved|non-reserved| |NAMESPACES|non-reserved|non-reserved|non-reserved| +|NANOSECOND|non-reserved|non-reserved|non-reserved| +|NANOSECONDS|non-reserved|non-reserved|non-reserved| |NATURAL|reserved|strict-non-reserved|reserved| |NO|non-reserved|non-reserved|reserved| |NOT|reserved|non-reserved|reserved| @@ -565,6 +573,7 @@ Below is a list of all the keywords in Spark SQL. |SCHEMA|non-reserved|non-reserved|non-reserved| |SCHEMAS|non-reserved|non-reserved|non-reserved| |SECOND|non-reserved|non-reserved|non-reserved| +|SECONDS|non-reserved|non-reserved|non-reserved| |SELECT|reserved|non-reserved|reserved| |SEMI|non-reserved|strict-non-reserved|non-reserved| |SEPARATED|non-reserved|non-reserved|non-reserved| @@ -631,10 +640,12 @@ Below is a list of all the keywords in Spark SQL. |VIEW|non-reserved|non-reserved|non-reserved| |VIEWS|non-reserved|non-reserved|non-reserved| |WEEK|non-reserved|non-reserved|non-reserved| +|WEEKS|non-reserved|non-reserved|non-reserved| |WHEN|reserved|non-reserved|reserved| |WHERE|reserved|non-reserved|reserved| |WINDOW|non-reserved|non-reserved|reserved| |WITH|reserved|non-reserved|reserved| |WITHIN|reserved|non-reserved|reserved| |YEAR|non-reserved|non-reserved|non-reserved| +|YEARS|non-reserved|non-reserved|non-reserved| |ZONE|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 index e11391fa5f4..41adbda7b10 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.g4 @@ -140,6 +140,7 @@ CURRENT_TIME: 'CURRENT_TIME'; CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP'; CURRENT_USER: 'CURRENT_USER'; DAY: 'DAY'; +DAYS: 'DAYS'; DAYOFYEAR: 'DAYOFYEAR'; DATA: 'DATA'; DATABASE: 'DATABASE'; @@ -194,6 +195,7 @@ GROUP: 'GROUP'; GROUPING: 'GROUPING'; HAVING: 'HAVING'; HOUR: 'HOUR'; +HOURS: 'HOURS'; IF: 'IF'; IGNORE: 'IGNORE'; IMPORT: 'IMPORT'; @@ -233,12 +235,18 @@ MAP: 'MAP'; MATCHED: 'MATCHED'; MERGE: 'MERGE'; MICROSECOND: 'MICROSECOND'; +MICROSECONDS: 'MICROSECONDS'; MILLISECOND: 'MILLISECOND'; +MILLISECONDS: 'MILLISECONDS'; MINUTE: 'MINUTE'; +MINUTES: 'MINUTES'; MONTH: 'MONTH'; +MONTHS: 'MONTHS'; MSCK: 'MSCK'; NAMESPACE: 'NAMESPACE'; NAMESPACES: 'NAMESPACES'; +NANOSECOND: 'NANOSECOND'; +NANOSECONDS: 'NANOSECONDS'; NATURAL: 'NATURAL'; NO: 'NO'; NOT: 'NOT' | '!'; @@ -299,6 +307,7 @@ ROLLUP: 'ROLLUP'; ROW: 'ROW'; ROWS: 'ROWS'; SECOND: 'SECOND'; +SECONDS: 'SECONDS'; SCHEMA: 'SCHEMA'; SCHEMAS: 'SCHEMAS'; SELECT: 'SELECT'; @@ -367,12 +376,14 @@ VERSION: 'VERSION'; VIEW: 'VIEW'; VIEWS: 'VIEWS'; WEEK: 'WEEK'; +WEEKS: 'WEEKS'; WHEN: 'WHEN'; WHERE: 'WHERE'; WINDOW: 'WINDOW'; WITH: 'WITH'; WITHIN: 'WITHIN'; YEAR: 'YEAR'; +YEARS: 'YEARS'; ZONE: 'ZONE'; //--SPARK-KEYWORD-LIST-END //============================ diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 index 9bd6e01c80a..7b673870af8 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseParser.g4 @@ -955,7 +955,7 @@ errorCapturingMultiUnitsInterval ; multiUnitsInterval - : (intervalValue unit+=identifier)+ + : (intervalValue unit+=unitInMultiUnits)+ ; errorCapturingUnitToUnitInterval @@ -963,7 +963,7 @@ errorCapturingUnitToUnitInterval ; unitToUnitInterval - : value=intervalValue from=identifier TO to=identifier + : value=intervalValue from=unitInUnitToUnit TO to=unitInUnitToUnit ; intervalValue @@ -971,6 +971,16 @@ intervalValue (INTEGER_VALUE | DECIMAL_VALUE | stringLit) ; +unitInMultiUnits + : NANOSECOND | NANOSECONDS | MICROSECOND | MICROSECONDS | MILLISECOND | MILLISECONDS + | SECOND | SECONDS | MINUTE | MINUTES | HOUR | HOURS | DAY | DAYS | WEEK | WEEKS + | MONTH | MONTHS | YEAR | YEARS + ; + +unitInUnitToUnit + : SECOND | MINUTE | HOUR | DAY | MONTH | YEAR + ; + colPosition : position=FIRST | position=AFTER afterCol=errorCapturingIdentifier ; @@ -1202,6 +1212,7 @@ ansiNonReserved | DATEADD | DATEDIFF | DAY + | DAYS | DAYOFYEAR | DBPROPERTIES | DEFAULT @@ -1236,6 +1247,7 @@ ansiNonReserved | GLOBAL | GROUPING | HOUR + | HOURS | IF | IGNORE | IMPORT @@ -1266,12 +1278,18 @@ ansiNonReserved | MATCHED | MERGE | MICROSECOND + | MICROSECONDS | MILLISECOND + | MILLISECONDS | MINUTE + | MINUTES | MONTH + | MONTHS | MSCK | NAMESPACE | NAMESPACES + | NANOSECOND + | NANOSECONDS | NO | NULLS | OF @@ -1319,6 +1337,7 @@ ansiNonReserved | SCHEMA | SCHEMAS | SECOND + | SECONDS | SEMI | SEPARATED | SERDE @@ -1372,8 +1391,10 @@ ansiNonReserved | VIEW | VIEWS | WEEK + | WEEKS | WINDOW | YEAR + | YEARS | ZONE //--ANSI-NON-RESERVED-END ; @@ -1464,6 +1485,7 @@ nonReserved | DATEADD | DATEDIFF | DAY + | DAYS | DAYOFYEAR | DBPROPERTIES | DEFAULT @@ -1511,6 +1533,7 @@ nonReserved | GROUPING | HAVING | HOUR + | HOURS | IF | IGNORE | IMPORT @@ -1545,12 +1568,18 @@ nonReserved | MATCHED | MERGE | MICROSECOND + | MICROSECONDS | MILLISECOND + | MILLISECONDS | MINUTE + | MINUTES | MONTH + | MONTHS | MSCK | NAMESPACE | NAMESPACES + | NANOSECOND + | NANOSECONDS | NO | NOT | NULL @@ -1610,6 +1639,7 @@ nonReserved | SCHEMA | SCHEMAS | SECOND + | SECONDS | SELECT | SEPARATED | SERDE @@ -1672,12 +1702,14 @@ nonReserved | VIEW | VIEWS | WEEK + | WEEKS | WHEN | WHERE | WINDOW | WITH | WITHIN | YEAR + | YEARS | ZONE //--DEFAULT-NON-RESERVED-END ; diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out index 18ba4fb0ab7..9d298fe350a 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out @@ -1500,17 +1500,12 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "_LEGACY_ERROR_TEMP_0062", + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42000", "messageParameters" : { - "msg" : "Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 17, - "stopIndex" : 27, - "fragment" : "1 fake_unit" - } ] + "error" : "'fake_unit'", + "hint" : "" + } } diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out b/sql/core/src/test/resources/sql-tests/results/interval.sql.out index bdb9ba81ff3..716ea9335c9 100644 --- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out @@ -1381,17 +1381,12 @@ struct<> -- !query output org.apache.spark.sql.catalyst.parser.ParseException { - "errorClass" : "_LEGACY_ERROR_TEMP_0062", + "errorClass" : "PARSE_SYNTAX_ERROR", + "sqlState" : "42000", "messageParameters" : { - "msg" : "Error parsing ' 1 fake_unit' to interval, invalid unit 'fake_unit'" - }, - "queryContext" : [ { - "objectType" : "", - "objectName" : "", - "startIndex" : 17, - "stopIndex" : 27, - "fragment" : "1 fake_unit" - } ] + "error" : "'fake_unit'", + "hint" : "" + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org