This is an automated email from the ASF dual-hosted git repository.
maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.2 by this push:
new fb6f379 [SPARK-36431][SQL] Support TypeCoercion of ANSI intervals
with different fields
fb6f379 is described below
commit fb6f3792afacead3299e86cc94f3f1460b3a4ba1
Author: Angerszhuuuu <[email protected]>
AuthorDate: Tue Aug 10 14:22:31 2021 +0300
[SPARK-36431][SQL] Support TypeCoercion of ANSI intervals with different
fields
### What changes were proposed in this pull request?
Support TypeCoercion of ANSI intervals with different fields
### Why are the changes needed?
Support TypeCoercion of ANSI intervals with different fields
### Does this PR introduce _any_ user-facing change?
After this pr user can
- use comparison function with different fields of
DayTimeIntervalType/YearMonthIntervalType such as `INTERVAL '1' YEAR` >
`INTERVAL '11' MONTH`
- support different field of ansi interval type in collection function
such as `array(INTERVAL '1' YEAR, INTERVAL '11' MONTH)`
- support different field of ansi interval type in `coalesce` etc..
### How was this patch tested?
Added UT
Closes #33661 from AngersZhuuuu/SPARK-SPARK-36431.
Authored-by: Angerszhuuuu <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
(cherry picked from commit 89d8a4eacfd09f67ad31bf1cbf7d4b88de3b1e24)
Signed-off-by: Max Gekk <[email protected]>
---
.../sql/catalyst/analysis/AnsiTypeCoercion.scala | 5 ++
.../spark/sql/catalyst/analysis/TypeCoercion.scala | 5 ++
.../sql/catalyst/analysis/TypeCoercionSuite.scala | 47 +++++++++++
.../test/resources/sql-tests/inputs/interval.sql | 13 +++
.../sql-tests/results/ansi/interval.sql.out | 98 +++++++++++++++++++++-
.../resources/sql-tests/results/interval.sql.out | 98 +++++++++++++++++++++-
6 files changed, 258 insertions(+), 8 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 457dc10..f03296f 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -120,6 +120,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
Some(TimestampType)
+ case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
+ Some(DayTimeIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+ case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
+ Some(YearMonthIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+
case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
}
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 42c10e8..db6f499 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -867,6 +867,11 @@ object TypeCoercion extends TypeCoercionBase {
case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
Some(TimestampType)
+ case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
+ Some(DayTimeIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+ case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
+ Some(YearMonthIntervalType(t1.startField.min(t2.startField),
t1.endField.max(t2.endField)))
+
case (_: TimestampNTZType, _: DateType) | (_: DateType, _:
TimestampNTZType) =>
Some(TimestampNTZType)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 602daf8..6a7d7ef 100644
---
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -18,6 +18,7 @@
package org.apache.spark.sql.catalyst.analysis
import java.sql.Timestamp
+import java.time.{Duration, Period}
import org.apache.spark.internal.config.Tests.IS_TESTING
import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
@@ -1604,6 +1605,52 @@ class TypeCoercionSuite extends AnalysisTest {
ruleTest(TypeCoercion.IntegralDivision, IntegralDivide(2, 1L),
IntegralDivide(Cast(2, LongType), 1L))
}
+
+ test("SPARK-36431: Support TypeCoercion of ANSI intervals with different
fields") {
+ DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym1 =>
+ DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym2 =>
+ val literal1 = Literal.create(Period.ofMonths(12), ym1)
+ val literal2 = Literal.create(Period.ofMonths(12), ym2)
+ val commonType = YearMonthIntervalType(
+ ym1.startField.min(ym2.startField), ym1.endField.max(ym2.endField))
+ if (commonType == ym1 && commonType == ym2) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(literal1, literal2))
+ } else if (commonType == ym1) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(literal1, Cast(literal2, commonType)))
+ } else if (commonType == ym2) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(Cast(literal1, commonType), literal2))
+ } else {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
+ }
+ }
+ }
+
+ DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt1 =>
+ DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt2 =>
+ val literal1 = Literal.create(Duration.ofSeconds(1111), dt1)
+ val literal2 = Literal.create(Duration.ofSeconds(1111), dt2)
+ val commonType = DayTimeIntervalType(
+ dt1.startField.min(dt2.startField), dt1.endField.max(dt2.endField))
+ if (commonType == dt1 && commonType == dt2) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(literal1, literal2))
+ } else if (commonType == dt1) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(literal1, Cast(literal2, commonType)))
+ } else if (commonType == dt2) {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(Cast(literal1, commonType), literal2))
+ } else {
+ ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+ EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
+ }
+ }
+ }
+ }
}
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 43d1e03..a16d152 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -322,3 +322,16 @@ SELECT INTERVAL '153722867280' MINUTE;
SELECT INTERVAL '-153722867280' MINUTE;
SELECT INTERVAL '54.775807' SECOND;
SELECT INTERVAL '-54.775807' SECOND;
+
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR;
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND;
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH;
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH;
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
+
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
diff --git
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 9bf492e..9ba5da3 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 200
+-- Number of queries: 211
-- !query
@@ -818,10 +818,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2'
DAY):map<int,interval day>>
-- !query
select map(1, interval 1 year, 2, interval 2 month)
-- !query schema
-struct<>
+struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year
to month>>
-- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data
type mismatch: The given values of function map should all be the same type,
but they are [interval year, interval month]; line 1 pos 7
+{1:1-0,2:0-2}
-- !query
@@ -1985,3 +1984,94 @@ SELECT INTERVAL '-54.775807' SECOND
struct<INTERVAL '-54.775807' SECOND:interval second>
-- !query output
-0 00:00:54.775807000
+
+
+-- !query
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
+-- !query schema
+struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
+-- !query schema
+struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO
SECOND):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
+-- !query schema
+struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
+-- !query schema
+struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type
mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)'
(interval month and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to
month>>
+-- !query output
+[1-0,0-1]
+
+
+-- !query
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval
day to minute>>
+-- !query output
+[1 00:00:00.000000000,0 01:01:00.000000000]
+
+
+-- !query
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type
mismatch: input to function array should all be the same type, but it's
[interval month, interval day]; line 1 pos 7
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval
day to minute>
+-- !query output
+1 00:00:00.000000000
+
+
+-- !query
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data
type mismatch: input to function coalesce should all be the same type, but it's
[interval month, interval day]; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 8780365..a15cc23 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
-- Automatically generated by SQLQueryTestSuite
--- Number of queries: 200
+-- Number of queries: 211
-- !query
@@ -817,10 +817,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2'
DAY):map<int,interval day>>
-- !query
select map(1, interval 1 year, 2, interval 2 month)
-- !query schema
-struct<>
+struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year
to month>>
-- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data
type mismatch: The given values of function map should all be the same type,
but they are [interval year, interval month]; line 1 pos 7
+{1:1-0,2:0-2}
-- !query
@@ -1984,3 +1983,94 @@ SELECT INTERVAL '-54.775807' SECOND
struct<INTERVAL '-54.775807' SECOND:interval second>
-- !query output
-0 00:00:54.775807000
+
+
+-- !query
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
+-- !query schema
+struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
+-- !query schema
+struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO
SECOND):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
+-- !query schema
+struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
+-- !query schema
+struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type
mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)'
(interval month and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to
month>>
+-- !query output
+[1-0,0-1]
+
+
+-- !query
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval
day to minute>>
+-- !query output
+[1 00:00:00.000000000,0 01:01:00.000000000]
+
+
+-- !query
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type
mismatch: input to function array should all be the same type, but it's
[interval month, interval day]; line 1 pos 7
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval
day to minute>
+-- !query output
+1 00:00:00.000000000
+
+
+-- !query
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data
type mismatch: input to function coalesce should all be the same type, but it's
[interval month, interval day]; line 1 pos 7
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]