This is an automated email from the ASF dual-hosted git repository.

maxgekk pushed a commit to branch branch-3.2
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.2 by this push:
     new 41e5144  [SPARK-36521][SQL] Disallow comparison between Interval and 
String
41e5144 is described below

commit 41e5144b53d21d4c67e35250594ee418bdfba136
Author: Gengliang Wang <gengli...@apache.org>
AuthorDate: Mon Aug 16 22:41:14 2021 +0300

    [SPARK-36521][SQL] Disallow comparison between Interval and String
    
    ### What changes were proposed in this pull request?
    
    Disallow comparison between Interval and String in the default type 
coercion rules.
    
    ### Why are the changes needed?
    
    If a binary comparison contains interval type and string type, we can't 
decide which
    interval type the string should be promoted as. There are many possible 
interval
    types, such as year interval, month interval, day interval, hour interval, 
etc.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No, the new interval type is not released yet.
    
    ### How was this patch tested?
    
    Existing UT
    
    Closes #33750 from gengliangwang/disallowCom.
    
    Authored-by: Gengliang Wang <gengli...@apache.org>
    Signed-off-by: Max Gekk <max.g...@gmail.com>
    (cherry picked from commit 26d6b952dcf7d387930701396de9cef679df7432)
    Signed-off-by: Max Gekk <max.g...@gmail.com>
---
 .../spark/sql/catalyst/analysis/TypeCoercion.scala | 16 +++-
 .../test/resources/sql-tests/inputs/interval.sql   |  6 ++
 .../sql-tests/results/ansi/interval.sql.out        | 56 +++++++++++++-
 .../resources/sql-tests/results/interval.sql.out   | 86 ++++++++++++++++++----
 4 files changed, 148 insertions(+), 16 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 23654af..863bdc0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -862,6 +862,18 @@ object TypeCoercion extends TypeCoercionBase {
     case _ => None
   }
 
+  // Return whether a string literal can be promoted as the give data type in 
a binary comparison.
+  private def canPromoteAsInBinaryComparison(dt: DataType) = dt match {
+    // If a binary comparison contains interval type and string type, we can't 
decide which
+    // interval type the string should be promoted as. There are many possible 
interval
+    // types, such as year interval, month interval, day interval, hour 
interval, etc.
+    case _: YearMonthIntervalType | _: DayTimeIntervalType => false
+    // There is no need to add `Cast` for comparison between strings.
+    case _: StringType => false
+    case _: AtomicType => true
+    case _ => false
+  }
+
   /**
    * This function determines the target type of a comparison operator when 
one operand
    * is a String and the other is not. It also handles when one op is a Date 
and the
@@ -891,8 +903,8 @@ object TypeCoercion extends TypeCoercionBase {
     case (n: DecimalType, s: StringType) => Some(DoubleType)
     case (s: StringType, n: DecimalType) => Some(DoubleType)
 
-    case (l: StringType, r: AtomicType) if r != StringType => Some(r)
-    case (l: AtomicType, r: StringType) if l != StringType => Some(l)
+    case (l: StringType, r: AtomicType) if canPromoteAsInBinaryComparison(r) 
=> Some(r)
+    case (l: AtomicType, r: StringType) if canPromoteAsInBinaryComparison(l) 
=> Some(l)
     case (l, r) => None
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/inputs/interval.sql 
b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
index 618cf16..279c5441 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/interval.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/interval.sql
@@ -341,9 +341,15 @@ SELECT INTERVAL 1 MONTH > INTERVAL 20 DAYS;
 SELECT INTERVAL '1' DAY < '1';
 SELECT INTERVAL '1' DAY = '1';
 SELECT INTERVAL '1' DAY > '1';
+SELECT '1' < INTERVAL '1' DAY;
+SELECT '1' = INTERVAL '1' DAY;
+SELECT '1' > INTERVAL '1' DAY;
 SELECT INTERVAL '1' YEAR < '1';
 SELECT INTERVAL '1' YEAR = '1';
 SELECT INTERVAL '1' YEAR > '1';
+SELECT '1' < INTERVAL '1' YEAR;
+SELECT '1' = INTERVAL '1' YEAR;
+SELECT '1' > INTERVAL '1' YEAR;
 
 SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH);
 SELECT array(INTERVAL '1' DAY, INTERVAL '01:01' HOUR TO MINUTE);
diff --git 
a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
index e0bf076..1aa0920 100644
--- a/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/ansi/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 251
+-- Number of queries: 257
 
 
 -- !query
@@ -2328,6 +2328,33 @@ cannot resolve '(INTERVAL '1' DAY > '1')' due to data 
type mismatch: differing t
 
 
 -- !query
+SELECT '1' < INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT '1' = INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT '1' > INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+
+
+-- !query
 SELECT INTERVAL '1' YEAR < '1'
 -- !query schema
 struct<>
@@ -2355,6 +2382,33 @@ cannot resolve '(INTERVAL '1' YEAR > '1')' due to data 
type mismatch: differing
 
 
 -- !query
+SELECT '1' < INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
+
+
+-- !query
+SELECT '1' = INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
+
+
+-- !query
+SELECT '1' > INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
+
+
+-- !query
 SELECT array(INTERVAL '1' YEAR, INTERVAL '1' MONTH)
 -- !query schema
 struct<array(INTERVAL '1' YEAR, INTERVAL '1' MONTH):array<interval year to 
month>>
diff --git a/sql/core/src/test/resources/sql-tests/results/interval.sql.out 
b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
index 3e6380b..5f6af71 100644
--- a/sql/core/src/test/resources/sql-tests/results/interval.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/interval.sql.out
@@ -1,5 +1,5 @@
 -- Automatically generated by SQLQueryTestSuite
--- Number of queries: 251
+-- Number of queries: 257
 
 
 -- !query
@@ -2292,49 +2292,109 @@ cannot resolve '(INTERVAL '1' MONTH > INTERVAL '20' 
DAY)' due to data type misma
 -- !query
 SELECT INTERVAL '1' DAY < '1'
 -- !query schema
-struct<(INTERVAL '1' DAY < 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY < '1')' due to data type mismatch: differing 
types in '(INTERVAL '1' DAY < '1')' (interval day and string).; line 1 pos 7
 
 
 -- !query
 SELECT INTERVAL '1' DAY = '1'
 -- !query schema
-struct<(INTERVAL '1' DAY = 1):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY = '1')' due to data type mismatch: differing 
types in '(INTERVAL '1' DAY = '1')' (interval day and string).; line 1 pos 7
 
 
 -- !query
 SELECT INTERVAL '1' DAY > '1'
 -- !query schema
-struct<(INTERVAL '1' DAY > 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' DAY > '1')' due to data type mismatch: differing 
types in '(INTERVAL '1' DAY > '1')' (interval day and string).; line 1 pos 7
+
+
+-- !query
+SELECT '1' < INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' < INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' < INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT '1' = INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' = INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' = INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
+
+
+-- !query
+SELECT '1' > INTERVAL '1' DAY
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' > INTERVAL '1' DAY)' due to data type mismatch: differing 
types in '('1' > INTERVAL '1' DAY)' (string and interval day).; line 1 pos 7
 
 
 -- !query
 SELECT INTERVAL '1' YEAR < '1'
 -- !query schema
-struct<(INTERVAL '1' YEAR < 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR < '1')' due to data type mismatch: 
differing types in '(INTERVAL '1' YEAR < '1')' (interval year and string).; 
line 1 pos 7
 
 
 -- !query
 SELECT INTERVAL '1' YEAR = '1'
 -- !query schema
-struct<(INTERVAL '1' YEAR = 1):boolean>
+struct<>
 -- !query output
-true
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR = '1')' due to data type mismatch: 
differing types in '(INTERVAL '1' YEAR = '1')' (interval year and string).; 
line 1 pos 7
 
 
 -- !query
 SELECT INTERVAL '1' YEAR > '1'
 -- !query schema
-struct<(INTERVAL '1' YEAR > 1):boolean>
+struct<>
 -- !query output
-false
+org.apache.spark.sql.AnalysisException
+cannot resolve '(INTERVAL '1' YEAR > '1')' due to data type mismatch: 
differing types in '(INTERVAL '1' YEAR > '1')' (interval year and string).; 
line 1 pos 7
+
+
+-- !query
+SELECT '1' < INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' < INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' < INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
+
+
+-- !query
+SELECT '1' = INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' = INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' = INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
+
+
+-- !query
+SELECT '1' > INTERVAL '1' YEAR
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+cannot resolve '('1' > INTERVAL '1' YEAR)' due to data type mismatch: 
differing types in '('1' > INTERVAL '1' YEAR)' (string and interval year).; 
line 1 pos 7
 
 
 -- !query

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to