This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new f960328 [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()` f960328 is described below commit f9603287d4a74f4be0486750fdee86386291503e Author: Max Gekk <max.g...@gmail.com> AuthorDate: Wed Mar 2 19:41:51 2022 +0300 [SPARK-38389][SQL] Add the `DATEDIFF()` and `DATE_DIFF()` aliases for `TIMESTAMPDIFF()` ### What changes were proposed in this pull request? In the PR, I propose to add two aliases for the `TIMESTAMPDIFF()` function introduced by https://github.com/apache/spark/pull/35607: - `DATEDIFF()` - `DATE_DIFF()` ### Why are the changes needed? 1. To make the migration process from other systems to Spark SQL easier. 2. To achieve feature parity with other DBMSs. ### Does this PR introduce _any_ user-facing change? No. The new aliases just extend Spark SQL API. ### How was this patch tested? 1. By running the existing test suites: ``` $ build/sbt "test:testOnly *SQLKeywordSuite" ``` 3. and new checks: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z date.sql" $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z datetime-legacy.sql" ``` Closes #35709 from MaxGekk/datediff. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- docs/sql-ref-ansi-compliance.md | 2 + .../apache/spark/sql/catalyst/parser/SqlBase.g4 | 8 ++- .../src/test/resources/sql-tests/inputs/date.sql | 12 ++++ .../resources/sql-tests/results/ansi/date.sql.out | 82 +++++++++++++++++++++- .../test/resources/sql-tests/results/date.sql.out | 82 +++++++++++++++++++++- .../sql-tests/results/datetime-legacy.sql.out | 82 +++++++++++++++++++++- 6 files changed, 264 insertions(+), 4 deletions(-) diff --git a/docs/sql-ref-ansi-compliance.md b/docs/sql-ref-ansi-compliance.md index 46bf415..7646206 100644 --- a/docs/sql-ref-ansi-compliance.md +++ b/docs/sql-ref-ansi-compliance.md @@ -392,6 +392,8 @@ Below is a list of all the keywords in Spark SQL. |DATABASES|non-reserved|non-reserved|non-reserved| |DATEADD|non-reserved|non-reserved|non-reserved| |DATE_ADD|non-reserved|non-reserved|non-reserved| +|DATEDIFF|non-reserved|non-reserved|non-reserved| +|DATE_DIFF|non-reserved|non-reserved|non-reserved| |DAY|non-reserved|non-reserved|non-reserved| |DBPROPERTIES|non-reserved|non-reserved|non-reserved| |DEFINED|non-reserved|non-reserved|non-reserved| diff --git a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 index ae57b42..3f5052d 100644 --- a/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 +++ b/sql/catalyst/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBase.g4 @@ -861,7 +861,7 @@ valueExpression primaryExpression : name=(CURRENT_DATE | CURRENT_TIMESTAMP | CURRENT_USER) #currentLike | name=(TIMESTAMPADD | DATEADD | DATE_ADD) '(' unit=identifier ',' unitsAmount=valueExpression ',' timestamp=valueExpression ')' #timestampadd - | TIMESTAMPDIFF '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')' #timestampdiff + | name=(TIMESTAMPDIFF | DATEDIFF | DATE_DIFF) '(' unit=identifier ',' startTimestamp=valueExpression ',' endTimestamp=valueExpression ')' #timestampdiff | CASE whenClause+ (ELSE elseExpression=expression)? END #searchedCase | CASE value=expression whenClause+ (ELSE elseExpression=expression)? END #simpleCase | name=(CAST | TRY_CAST) '(' expression AS dataType ')' #cast @@ -1133,6 +1133,8 @@ ansiNonReserved | DATABASES | DATEADD | DATE_ADD + | DATEDIFF + | DATE_DIFF | DAY | DBPROPERTIES | DEFINED @@ -1383,6 +1385,8 @@ nonReserved | DATABASES | DATEADD | DATE_ADD + | DATEDIFF + | DATE_DIFF | DAY | DBPROPERTIES | DEFINED @@ -1653,6 +1657,8 @@ DATABASE: 'DATABASE'; DATABASES: 'DATABASES'; DATEADD: 'DATEADD'; DATE_ADD: 'DATE_ADD'; +DATEDIFF: 'DATEDIFF'; +DATE_DIFF: 'DATE_DIFF'; DBPROPERTIES: 'DBPROPERTIES'; DEFINED: 'DEFINED'; DELETE: 'DELETE'; diff --git a/sql/core/src/test/resources/sql-tests/inputs/date.sql b/sql/core/src/test/resources/sql-tests/inputs/date.sql index 6fcba1d..4c8d5a7 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/date.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/date.sql @@ -152,3 +152,15 @@ select dateadd(WEEK, -4, timestamp'2022-02-25 01:02:03'); select date_add(MONTH, -1, timestamp'2022-02-25 01:02:03'); select dateadd(QUARTER, 5, date'2022-02-25'); select date_add(YEAR, 1, date'2022-02-25'); + +-- Get the difference between timestamps or dates in the specified units +select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001'); +select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455'); +select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01'); +select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00'); +select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03'); +select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00'); +select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03'); +select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03'); +select date_diff(QUARTER, date'2022-02-25', date'2023-05-25'); +select datediff(YEAR, date'2022-02-25', date'2023-02-25'); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out index 07989ae..a21512f 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/date.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 87 +-- Number of queries: 97 -- !query @@ -740,3 +740,83 @@ select date_add(YEAR, 1, date'2022-02-25') struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp> -- !query output 2023-02-25 00:00:00 + + +-- !query +select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') +-- !query schema +struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint> +-- !query output +1001 + + +-- !query +select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455') +-- !query schema +struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01') +-- !query schema +struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint> +-- !query output +58 + + +-- !query +select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00') +-- !query schema +struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint> +-- !query output +-100 + + +-- !query +select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') +-- !query schema +struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint> +-- !query output +-1 + + +-- !query +select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00') +-- !query schema +struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint> +-- !query output +367 + + +-- !query +select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') +-- !query schema +struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint> +-- !query output +-4 + + +-- !query +select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03') +-- !query schema +struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(QUARTER, date'2022-02-25', date'2023-05-25') +-- !query schema +struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint> +-- !query output +5 + + +-- !query +select datediff(YEAR, date'2022-02-25', date'2023-02-25') +-- !query schema +struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint> +-- !query output +1 diff --git a/sql/core/src/test/resources/sql-tests/results/date.sql.out b/sql/core/src/test/resources/sql-tests/results/date.sql.out index e3a2d7d..bd32361 100644 --- a/sql/core/src/test/resources/sql-tests/results/date.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/date.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 87 +-- Number of queries: 97 -- !query @@ -739,3 +739,83 @@ select date_add(YEAR, 1, date'2022-02-25') struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp> -- !query output 2023-02-25 00:00:00 + + +-- !query +select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') +-- !query schema +struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint> +-- !query output +1001 + + +-- !query +select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455') +-- !query schema +struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01') +-- !query schema +struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint> +-- !query output +58 + + +-- !query +select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00') +-- !query schema +struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint> +-- !query output +-100 + + +-- !query +select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') +-- !query schema +struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint> +-- !query output +-1 + + +-- !query +select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00') +-- !query schema +struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint> +-- !query output +367 + + +-- !query +select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') +-- !query schema +struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint> +-- !query output +-4 + + +-- !query +select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03') +-- !query schema +struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(QUARTER, date'2022-02-25', date'2023-05-25') +-- !query schema +struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint> +-- !query output +5 + + +-- !query +select datediff(YEAR, date'2022-02-25', date'2023-02-25') +-- !query schema +struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint> +-- !query output +1 diff --git a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out index 60752e3..8eeed14 100644 --- a/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/datetime-legacy.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 184 +-- Number of queries: 194 -- !query @@ -739,6 +739,86 @@ struct<timestampadd(YEAR, 1, DATE '2022-02-25'):timestamp> -- !query +select date_diff(MICROSECOND, timestamp'2022-02-25 01:02:03.123', timestamp'2022-02-25 01:02:03.124001') +-- !query schema +struct<timestampdiff(MICROSECOND, TIMESTAMP '2022-02-25 01:02:03.123', TIMESTAMP '2022-02-25 01:02:03.124001'):bigint> +-- !query output +1001 + + +-- !query +select datediff(MILLISECOND, timestamp'2022-02-25 01:02:03.456', timestamp'2022-02-25 01:02:03.455') +-- !query schema +struct<timestampdiff(MILLISECOND, TIMESTAMP '2022-02-25 01:02:03.456', TIMESTAMP '2022-02-25 01:02:03.455'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(SECOND, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 01:03:01') +-- !query schema +struct<timestampdiff(SECOND, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 01:03:01'):bigint> +-- !query output +58 + + +-- !query +select datediff(MINUTE, date'2022-02-25', timestamp'2022-02-24 22:20:00') +-- !query schema +struct<timestampdiff(MINUTE, DATE '2022-02-25', TIMESTAMP '2022-02-24 22:20:00'):bigint> +-- !query output +-100 + + +-- !query +select date_diff(HOUR, timestamp'2022-02-25 01:02:03', timestamp'2022-02-25 00:02:03') +-- !query schema +struct<timestampdiff(HOUR, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-02-25 00:02:03'):bigint> +-- !query output +-1 + + +-- !query +select datediff(DAY, date'2022-02-25', timestamp'2023-02-27 00:00:00') +-- !query schema +struct<timestampdiff(DAY, DATE '2022-02-25', TIMESTAMP '2023-02-27 00:00:00'):bigint> +-- !query output +367 + + +-- !query +select date_diff(WEEK, timestamp'2022-02-25 01:02:03', timestamp'2022-01-28 01:02:03') +-- !query schema +struct<timestampdiff(WEEK, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-28 01:02:03'):bigint> +-- !query output +-4 + + +-- !query +select datediff(MONTH, timestamp'2022-02-25 01:02:03', timestamp'2022-01-25 01:02:03') +-- !query schema +struct<timestampdiff(MONTH, TIMESTAMP '2022-02-25 01:02:03', TIMESTAMP '2022-01-25 01:02:03'):bigint> +-- !query output +-1 + + +-- !query +select date_diff(QUARTER, date'2022-02-25', date'2023-05-25') +-- !query schema +struct<timestampdiff(QUARTER, DATE '2022-02-25', DATE '2023-05-25'):bigint> +-- !query output +5 + + +-- !query +select datediff(YEAR, date'2022-02-25', date'2023-02-25') +-- !query schema +struct<timestampdiff(YEAR, DATE '2022-02-25', DATE '2023-02-25'):bigint> +-- !query output +1 + + +-- !query select timestamp '2019-01-01\t' -- !query schema struct<TIMESTAMP '2019-01-01 00:00:00':timestamp> --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org