MINUS operations

dongjoon Tue, 30 Jun 2020 23:43:00 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.4
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-2.4 by this push:
     new 37b32c8  [SPARK-32131][SQL] Fix AnalysisException messages at 
UNION/EXCEPT/MINUS operations
37b32c8 is described below

commit 37b32c8d7bcb3cf2f6e1e827278bbf5259ed10a5
Author: GuoPhilipse <[email protected]>
AuthorDate: Tue Jun 30 23:33:50 2020 -0700

    [SPARK-32131][SQL] Fix AnalysisException messages at UNION/EXCEPT/MINUS 
operations
    
    fix error exception messages during exceptions on Union and set operations
    
    Union and set operations can only be performed on tables with the 
compatible column types,while when we have more than two column, the exception 
messages will have wrong column index.
    
    Steps to reproduce:
    
    ```
    drop table if exists test1;
    drop table if exists test2;
    drop table if exists test3;
    create table if not exists test1(id int, age int, name timestamp);
    create table if not exists test2(id int, age timestamp, name timestamp);
    create table if not exists test3(id int, age int, name int);
    insert into test1 select 1,2,'2020-01-01 01:01:01';
    insert into test2 select 1,'2020-01-01 01:01:01','2020-01-01 01:01:01';
    insert into test3 select 1,3,4;
    ```
    
    Query1:
    ```sql
    select * from test1 except select * from test2;
    ```
    Result1:
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed 
on tables with the compatible column types. timestamp <> int at the second 
column of the second table;; 'Except false :- Project [id#620, age#621, 
name#622] : +- SubqueryAlias `default`.`test1` : +- HiveTableRelation 
`default`.`test1`, org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, [id#620, 
age#621, name#622] +- Project [id#623, age#624, name#625] +- SubqueryAlias 
`default`.`test2` +- HiveTableRelation `d [...]
    ```
    
    Query2:
    
    ```sql
    select * from test1 except select * from test3;
    ```
    
    Result2:
    
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed 
on tables with the compatible column types
     int <> timestamp at the 2th column of the second table;
    ```
    
    the above query1 has the right exception message
    the above query2 have the wrong errors information, it may need to change 
to the following
    
    ```
    Error: org.apache.spark.sql.AnalysisException: Except can only be performed 
on tables with the compatible column types.
    int <> timestamp at the  third column of the second table
    ```
    
    NO
    
    unit test
    
    Closes #28951 from GuoPhilipse/32131-correct-error-messages.
    
    Lead-authored-by: GuoPhilipse 
<[email protected]>
    Co-authored-by: GuoPhilipse <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 02f3b80d3a277e0c19a66c28d935fa41da7b3307)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  3 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala      | 53 ++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 6fb66ca..ad3acbb 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -274,7 +274,8 @@ trait CheckAnalysis extends PredicateHelper {
             def ordinalNumber(i: Int): String = i match {
               case 0 => "first"
               case 1 => "second"
-              case i => s"${i}th"
+              case 2 => "third"
+              case i => s"${i + 1}th"
             }
             val ref = dataTypes(operator.children.head)
             operator.children.tail.zipWithIndex.foreach { case (child, ti) =>
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index d8cb6f7..8eaba32 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -604,4 +604,57 @@ class AnalysisSuite extends AnalysisTest with Matchers {
       checkAnalysis(input, expected)
     }
   }
+
+  test("SPARK-32131: Fix wrong column index when we have more than two 
columns" +
+    " during union and set operations" ) {
+    val firstTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", FloatType)())
+
+    val secondTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", TimestampType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", FloatType)())
+
+    val thirdTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", TimestampType)(),
+      AttributeReference("d", FloatType)())
+
+    val fourthTable = LocalRelation(
+      AttributeReference("a", StringType)(),
+      AttributeReference("b", DoubleType)(),
+      AttributeReference("c", IntegerType)(),
+      AttributeReference("d", TimestampType)())
+
+    val r1 = Union(firstTable, secondTable)
+    val r2 = Union(firstTable, thirdTable)
+    val r3 = Union(firstTable, fourthTable)
+    val r4 = Except(firstTable, secondTable, isAll = false)
+    val r5 = Intersect(firstTable, secondTable, isAll = false)
+
+    assertAnalysisError(r1,
+      Seq("Union can only be performed on tables with the compatible column 
types. " +
+        "timestamp <> double at the second column of the second table"))
+
+    assertAnalysisError(r2,
+      Seq("Union can only be performed on tables with the compatible column 
types. " +
+        "timestamp <> int at the third column of the second table"))
+
+    assertAnalysisError(r3,
+      Seq("Union can only be performed on tables with the compatible column 
types. " +
+        "timestamp <> float at the 4th column of the second table"))
+
+    assertAnalysisError(r4,
+      Seq("Except can only be performed on tables with the compatible column 
types. " +
+        "timestamp <> double at the second column of the second table"))
+
+    assertAnalysisError(r5,
+      Seq("Intersect can only be performed on tables with the compatible 
column types. " +
+        "timestamp <> double at the second column of the second table"))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-2.4 updated: [SPARK-32131][SQL] Fix AnalysisException messages at UNION/EXCEPT/MINUS operations

Reply via email to