This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new a4f96afdc14 [SPARK-39451][SQL] Support casting intervals to integrals in ANSI mode a4f96afdc14 is described below commit a4f96afdc147bfee3e0f195b7bcf3dfa882ad511 Author: Max Gekk <max.g...@gmail.com> AuthorDate: Mon Jun 13 13:04:01 2022 +0300 [SPARK-39451][SQL] Support casting intervals to integrals in ANSI mode ### What changes were proposed in this pull request? In the PR, I propose to support casting of interval types to the integral type: `TINYINT`, `SMALLINT`, `INT`, `BIGINT`. ### Why are the changes needed? To conform the SQL standard which allows such casting: <img width="801" alt="Screenshot 2022-06-12 at 13 04 44" src="https://user-images.githubusercontent.com/1580697/173228149-17e1fbaa-c095-4eb7-bb3b-81a3f9c91928.png"> ### Does this PR introduce _any_ user-facing change? No, it extends existing behavior. ### How was this patch tested? By running new tests: ``` $ build/sbt "sql/testOnly org.apache.spark.sql.SQLQueryTestSuite -- -z cast.sql" ``` Closes #36811 from MaxGekk/cast-interval-to-int. Authored-by: Max Gekk <max.g...@gmail.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../spark/sql/catalyst/expressions/Cast.scala | 5 +- .../src/test/resources/sql-tests/inputs/cast.sql | 12 ++++ .../resources/sql-tests/results/ansi/cast.sql.out | 77 +++++++++++++++++++++- .../test/resources/sql-tests/results/cast.sql.out | 77 +++++++++++++++++++++- 4 files changed, 167 insertions(+), 4 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala index 497261be2e4..0746bc0fcd0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/Cast.scala @@ -108,8 +108,9 @@ object Cast { case (TimestampType, TimestampNTZType) => true case (StringType, _: CalendarIntervalType) => true - case (StringType, _: DayTimeIntervalType) => true - case (StringType, _: YearMonthIntervalType) => true + case (StringType, _: AnsiIntervalType) => true + + case (_: AnsiIntervalType, _: IntegralType) => true case (_: DayTimeIntervalType, _: DayTimeIntervalType) => true case (_: YearMonthIntervalType, _: YearMonthIntervalType) => true diff --git a/sql/core/src/test/resources/sql-tests/inputs/cast.sql b/sql/core/src/test/resources/sql-tests/inputs/cast.sql index 4610716902e..5198611a2b3 100644 --- a/sql/core/src/test/resources/sql-tests/inputs/cast.sql +++ b/sql/core/src/test/resources/sql-tests/inputs/cast.sql @@ -104,3 +104,15 @@ select cast('a' as timestamp_ntz); select cast(cast('inf' as double) as timestamp); select cast(cast('inf' as float) as timestamp); + +-- cast ANSI intervals to numerics +select cast(interval '1' year as tinyint); +select cast(interval '-10-2' year to month as smallint); +select cast(interval '1000' month as int); +select cast(interval -'10.123456' second as tinyint); +select cast(interval '23:59:59' hour to second as smallint); +select cast(interval -'1 02:03:04.123' day to second as int); +select cast(interval '10' day as bigint); + +select cast(interval '-1000' month as tinyint); +select cast(interval '1000000' second as smallint); diff --git a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out index f1c2dbd3d7d..b05a85d2927 100644 --- a/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/ansi/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 72 +-- Number of queries: 81 -- !query @@ -766,3 +766,78 @@ org.apache.spark.SparkDateTimeException == SQL(line 1, position 8) == select cast(cast('inf' as float) as timestamp) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + + +-- !query +select cast(interval '1' year as tinyint) +-- !query schema +struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint> +-- !query output +1 + + +-- !query +select cast(interval '-10-2' year to month as smallint) +-- !query schema +struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint> +-- !query output +-122 + + +-- !query +select cast(interval '1000' month as int) +-- !query schema +struct<CAST(INTERVAL '1000' MONTH AS INT):int> +-- !query output +1000 + + +-- !query +select cast(interval -'10.123456' second as tinyint) +-- !query schema +struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint> +-- !query output +-10 + + +-- !query +select cast(interval '23:59:59' hour to second as smallint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '23:59:59' HOUR TO SECOND of the type "INTERVAL HOUR TO SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + + +-- !query +select cast(interval -'1 02:03:04.123' day to second as int) +-- !query schema +struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int> +-- !query output +-93784 + + +-- !query +select cast(interval '10' day as bigint) +-- !query schema +struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint> +-- !query output +10 + + +-- !query +select cast(interval '-1000' month as tinyint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '-1000' MONTH of the type "INTERVAL MONTH" cannot be cast to "TINYINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + + +-- !query +select cast(interval '1000000' second as smallint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '1000000' SECOND of the type "INTERVAL SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. diff --git a/sql/core/src/test/resources/sql-tests/results/cast.sql.out b/sql/core/src/test/resources/sql-tests/results/cast.sql.out index aaa82e43513..dbb32a5ed31 100644 --- a/sql/core/src/test/resources/sql-tests/results/cast.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/cast.sql.out @@ -1,5 +1,5 @@ -- Automatically generated by SQLQueryTestSuite --- Number of queries: 72 +-- Number of queries: 81 -- !query @@ -594,3 +594,78 @@ select cast(cast('inf' as float) as timestamp) struct<CAST(CAST(inf AS FLOAT) AS TIMESTAMP):timestamp> -- !query output NULL + + +-- !query +select cast(interval '1' year as tinyint) +-- !query schema +struct<CAST(INTERVAL '1' YEAR AS TINYINT):tinyint> +-- !query output +1 + + +-- !query +select cast(interval '-10-2' year to month as smallint) +-- !query schema +struct<CAST(INTERVAL '-10-2' YEAR TO MONTH AS SMALLINT):smallint> +-- !query output +-122 + + +-- !query +select cast(interval '1000' month as int) +-- !query schema +struct<CAST(INTERVAL '1000' MONTH AS INT):int> +-- !query output +1000 + + +-- !query +select cast(interval -'10.123456' second as tinyint) +-- !query schema +struct<CAST(INTERVAL '-10.123456' SECOND AS TINYINT):tinyint> +-- !query output +-10 + + +-- !query +select cast(interval '23:59:59' hour to second as smallint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '23:59:59' HOUR TO SECOND of the type "INTERVAL HOUR TO SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + + +-- !query +select cast(interval -'1 02:03:04.123' day to second as int) +-- !query schema +struct<CAST(INTERVAL '-1 02:03:04.123' DAY TO SECOND AS INT):int> +-- !query output +-93784 + + +-- !query +select cast(interval '10' day as bigint) +-- !query schema +struct<CAST(INTERVAL '10' DAY AS BIGINT):bigint> +-- !query output +10 + + +-- !query +select cast(interval '-1000' month as tinyint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '-1000' MONTH of the type "INTERVAL MONTH" cannot be cast to "TINYINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. + + +-- !query +select cast(interval '1000000' second as smallint) +-- !query schema +struct<> +-- !query output +org.apache.spark.SparkArithmeticException +[CAST_OVERFLOW] The value INTERVAL '1000000' SECOND of the type "INTERVAL SECOND" cannot be cast to "SMALLINT" due to an overflow. Use `try_cast` to tolerate overflow and return NULL instead. If necessary set "spark.sql.ansi.enabled" to "false" to bypass this error. --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org