This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new fb6f379  [SPARK-36431][SQL] Support TypeCoercion of ANSI intervals 
with different fields
fb6f379 is described below

commit fb6f3792afacead3299e86cc94f3f1460b3a4ba1
Author: Angerszhuuuu <[email protected]>
AuthorDate: Tue Aug 10 14:22:31 2021 +0300

    [SPARK-36431][SQL] Support TypeCoercion of ANSI intervals with different 
fields
    
    ### What changes were proposed in this pull request?
     Support TypeCoercion of ANSI intervals with different fields
    
    ### Why are the changes needed?
     Support TypeCoercion of ANSI intervals with different fields
    
    ### Does this PR introduce _any_ user-facing change?
    After this pr user can
     - use comparison function with  different fields of 
DayTimeIntervalType/YearMonthIntervalType such as `INTERVAL '1' YEAR` > 
`INTERVAL '11' MONTH`
     - support different field of ansi interval type in collection function 
such as `array(INTERVAL '1' YEAR, INTERVAL '11' MONTH)`
     - support different field of ansi interval type in `coalesce` etc..
    
    ### How was this patch tested?
    Added UT
    
    Closes #33661 from AngersZhuuuu/SPARK-SPARK-36431.
    
    Authored-by: Angerszhuuuu <[email protected]>
    Signed-off-by: Max Gekk <[email protected]>
    (cherry picked from commit 89d8a4eacfd09f67ad31bf1cbf7d4b88de3b1e24)
    Signed-off-by: Max Gekk <[email protected]>
---
 .../sql/catalyst/analysis/AnsiTypeCoercion.scala   |  5 ++
 .../spark/sql/catalyst/analysis/TypeCoercion.scala |  5 ++
 .../sql/catalyst/analysis/TypeCoercionSuite.scala  | 47 +++++++++++
 .../test/resources/sql-tests/inputs/interval.sql   | 13 +++
 .../sql-tests/results/ansi/interval.sql.out        | 98 +++++++++++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 98 +++++++++++++++++++++-
 6 files changed, 258 insertions(+), 8 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
index 457dc10..f03296f 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/AnsiTypeCoercion.scala
@@ -120,6 +120,11 @@ object AnsiTypeCoercion extends TypeCoercionBase {
     case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
       Some(TimestampType)
 
+    case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
+      Some(DayTimeIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
+    case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
+      Some(YearMonthIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
+
     case (t1, t2) => findTypeForComplex(t1, t2, findTightestCommonType)
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 42c10e8..db6f499 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -867,6 +867,11 @@ object TypeCoercion extends TypeCoercionBase {
       case (_: TimestampType, _: DateType) | (_: DateType, _: TimestampType) =>
         Some(TimestampType)
 
+      case (t1: DayTimeIntervalType, t2: DayTimeIntervalType) =>
+        Some(DayTimeIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
+      case (t1: YearMonthIntervalType, t2: YearMonthIntervalType) =>
+        Some(YearMonthIntervalType(t1.startField.min(t2.startField), 
t1.endField.max(t2.endField)))
+
       case (_: TimestampNTZType, _: DateType) | (_: DateType, _: 
TimestampNTZType) =>
         Some(TimestampNTZType)
 
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 602daf8..6a7d7ef 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -18,6 +18,7 @@
 package org.apache.spark.sql.catalyst.analysis
 
 import java.sql.Timestamp
+import java.time.{Duration, Period}
 
 import org.apache.spark.internal.config.Tests.IS_TESTING
 import org.apache.spark.sql.catalyst.analysis.TypeCoercion._
@@ -1604,6 +1605,52 @@ class TypeCoercionSuite extends AnalysisTest {
     ruleTest(TypeCoercion.IntegralDivision, IntegralDivide(2, 1L),
       IntegralDivide(Cast(2, LongType), 1L))
   }
+
+  test("SPARK-36431: Support TypeCoercion of ANSI intervals with different 
fields") {
+    DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym1 =>
+      DataTypeTestUtils.yearMonthIntervalTypes.foreach { ym2 =>
+        val literal1 = Literal.create(Period.ofMonths(12), ym1)
+        val literal2 = Literal.create(Period.ofMonths(12), ym2)
+        val commonType = YearMonthIntervalType(
+          ym1.startField.min(ym2.startField), ym1.endField.max(ym2.endField))
+        if (commonType == ym1 && commonType == ym2) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(literal1, literal2))
+        } else if (commonType == ym1) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(literal1, Cast(literal2, commonType)))
+        } else if (commonType == ym2) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(Cast(literal1, commonType), literal2))
+        } else {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
+        }
+      }
+    }
+
+    DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt1 =>
+      DataTypeTestUtils.dayTimeIntervalTypes.foreach { dt2 =>
+        val literal1 = Literal.create(Duration.ofSeconds(1111), dt1)
+        val literal2 = Literal.create(Duration.ofSeconds(1111), dt2)
+        val commonType = DayTimeIntervalType(
+          dt1.startField.min(dt2.startField), dt1.endField.max(dt2.endField))
+        if (commonType == dt1 && commonType == dt2) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(literal1, literal2))
+        } else if (commonType == dt1) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(literal1, Cast(literal2, commonType)))
+        } else if (commonType == dt2) {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(Cast(literal1, commonType), literal2))
+        } else {
+          ruleTest(TypeCoercion.ImplicitTypeCasts, EqualTo(literal1, literal2),
+            EqualTo(Cast(literal1, commonType), Cast(literal2, commonType)))
+        }
+      }
+    }
+  }
 }
 
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql 
b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 43d1e03..a16d152 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -322,3 +322,16 @@ SELECT INTERVAL '153722867280' MINUTE;
 SELECT INTERVAL '-153722867280' MINUTE;
 SELECT INTERVAL '54.775807' SECOND;
 SELECT INTERVAL '-54.775807' SECOND;
+
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR;
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND;
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH;
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH;
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
+
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index 9bf492e..9ba5da3 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 200
+-- Number of queries: 211
 
 
 -- !query
@@ -818,10 +818,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' 
DAY):map<int,interval day>>
 -- !query
 select map(1, interval 1 year, 2, interval 2 month)
 -- !query schema
-struct<>
+struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year 
to month>>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data 
type mismatch: The given values of function map should all be the same type, 
but they are [interval year, interval month]; line 1 pos 7
+{1:1-0,2:0-2}
 
 
 -- !query
@@ -1985,3 +1984,94 @@ SELECT INTERVAL '-54.775807' SECOND
 struct<INTERVAL '-54.775807' SECOND:interval second>
 -- !query output
 -0 00:00:54.775807000
+
+
+-- !query
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
+-- !query schema
+struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
+-- !query schema
+struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO 
SECOND):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
+-- !query schema
+struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
+-- !query schema
+struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type 
mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' 
(interval month and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to 
month>>
+-- !query output
+[1-0,0-1]
+
+
+-- !query
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval 
day to minute>>
+-- !query output
+[1 00:00:00.000000000,0 01:01:00.000000000]
+
+
+-- !query
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type 
mismatch: input to function array should all be the same type, but it's 
[interval month, interval day]; line 1 pos 7
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval 
day to minute>
+-- !query output
+1 00:00:00.000000000
+
+
+-- !query
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data 
type mismatch: input to function coalesce should all be the same type, but it's 
[interval month, interval day]; line 1 pos 7
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 8780365..a15cc23 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 200
+-- Number of queries: 211
 
 
 -- !query
@@ -817,10 +817,9 @@ struct<map(1, INTERVAL '1' DAY, 2, INTERVAL '2' 
DAY):map<int,interval day>>
 -- !query
 select map(1, interval 1 year, 2, interval 2 month)
 -- !query schema
-struct<>
+struct<map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH):map<int,interval year 
to month>>
 -- !query output
-org.apache.spark.sql.AnalysisException
-cannot resolve 'map(1, INTERVAL '1' YEAR, 2, INTERVAL '2' MONTH)' due to data 
type mismatch: The given values of function map should all be the same type, 
but they are [interval year, interval month]; line 1 pos 7
+{1:1-0,2:0-2}
 
 
 -- !query
@@ -1984,3 +1983,94 @@ SELECT INTERVAL '-54.775807' SECOND
 struct<INTERVAL '-54.775807' SECOND:interval second>
 -- !query output
 -0 00:00:54.775807000
+
+
+-- !query
+SELECT INTERVAL '1' DAY > INTERVAL '1' HOUR
+-- !query schema
+struct<(INTERVAL '1' DAY > INTERVAL '01' HOUR):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO SECOND
+-- !query schema
+struct<(INTERVAL '1 02' DAY TO HOUR = INTERVAL '02:10:55' HOUR TO 
SECOND):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '1' YEAR < INTERVAL '1' MONTH
+-- !query schema
+struct<(INTERVAL '1' YEAR < INTERVAL '1' MONTH):boolean>
+-- !query output
+false
+
+
+-- !query
+SELECT INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH
+-- !query schema
+struct<(INTERVAL '-1-1' YEAR TO MONTH = INTERVAL '-13' MONTH):boolean>
+-- !query output
+true
+
+
+-- !query
+SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' due to data type 
mismatch: differing types in '(INTERVAL '1' MONTH > INTERVAL '20' DAY)' 
(interval month and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to 
month>>
+-- !query output
+[1-0,0-1]
+
+
+-- !query
+SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):array<interval 
day to minute>>
+-- !query output
+[1 00:00:00.000000000,0 01:01:00.000000000]
+
+
+-- !query
+SELECT array(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'array(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data type 
mismatch: input to function array should all be the same type, but it's 
[interval month, interval day]; line 1 pos 7
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
+-- !query schema
+struct<coalesce(INTERVAL '1' YEAR, INTERVAL '1' MONTH):interval year to month>
+-- !query output
+1-0
+
+
+-- !query
+SELECT coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE)
+-- !query schema
+struct<coalesce(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE):interval 
day to minute>
+-- !query output
+1 00:00:00.000000000
+
+
+-- !query
+SELECT coalesce(INTERVAL 1 MONTH, INTERVAL 20 DAYS)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve 'coalesce(INTERVAL '1' MONTH, INTERVAL '20' DAY)' due to data 
type mismatch: input to function coalesce should all be the same type, but it's 
[interval month, interval day]; line 1 pos 7

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to