[spark] branch master updated: [SPARK-36797][SQL] Union should resolve nested columns as top-level columns

wenchen Mon, 27 Sep 2021 00:52:40 -0700

This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new 44070e0  [SPARK-36797][SQL] Union should resolve nested columns as 
top-level columns
44070e0 is described below

commit 44070e0e5dc738838823ad99faf9432c85cb05af
Author: Liang-Chi Hsieh <[email protected]>
AuthorDate: Mon Sep 27 15:51:43 2021 +0800

    [SPARK-36797][SQL] Union should resolve nested columns as top-level columns
    
    ### What changes were proposed in this pull request?
    
    This patch proposes to generalize the resolving-by-position behavior to 
nested columns for Union.
    
    ### Why are the changes needed?
    
    Union, by the API definition, resolves columns by position. Currently we 
only follow this behavior at top-level columns, but not nested columns.
    
    As we are making nested columns as first-class citizen, the 
nested-column-only limitation and the difference between top-level column and 
nested column do not make sense. We should also resolve nested columns like 
top-level columns for Union.
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes. After this change, Union also resolves nested columns by position.
    
    ### How was this patch tested?
    
    Added tests.
    
    Closes #34038 from viirya/SPARK-36797.
    
    Authored-by: Liang-Chi Hsieh <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 .../sql/catalyst/analysis/CheckAnalysis.scala      |  19 ++-
 .../spark/sql/catalyst/analysis/TypeCoercion.scala |  32 ++---
 .../plans/logical/basicLogicalOperators.scala      |   2 +-
 .../sql/catalyst/analysis/AnalysisSuite.scala      |  15 ++-
 .../sql/catalyst/analysis/TypeCoercionSuite.scala  |   3 +-
 .../resources/sql-tests/results/except-all.sql.out |   2 +-
 .../sql-tests/results/intersect-all.sql.out        |   2 +-
 .../sql-tests/results/postgreSQL/union.sql.out     |   4 +-
 .../native/widenSetOperationTypes.sql.out          | 140 ++++++++++-----------
 .../sql-tests/results/udf/udf-except-all.sql.out   |   2 +-
 .../results/udf/udf-intersect-all.sql.out          |   2 +-
 .../spark/sql/DataFrameSetOperationsSuite.scala    |  46 +++++--
 .../scala/org/apache/spark/sql/SQLQuerySuite.scala |   7 +-
 13 files changed, 159 insertions(+), 117 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
index 2adf110..b62e934 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckAnalysis.scala
@@ -401,15 +401,26 @@ trait CheckAnalysis extends PredicateHelper with 
LookupCatalog {
                     |the ${ordinalNumber(ti + 1)} table has 
${child.output.length} columns
                   """.stripMargin.replace("\n", " ").trim())
               }
+              val isUnion = operator.isInstanceOf[Union]
+              val dataTypesAreCompatibleFn = if (isUnion) {
+                (dt1: DataType, dt2: DataType) =>
+                  !DataType.equalsStructurally(dt1, dt2, true)
+              } else {
+                // SPARK-18058: we shall not care about the nullability of 
columns
+                (dt1: DataType, dt2: DataType) =>
+                  TypeCoercion.findWiderTypeForTwo(dt1.asNullable, 
dt2.asNullable).isEmpty
+              }
+
               // Check if the data types match.
               dataTypes(child).zip(ref).zipWithIndex.foreach { case ((dt1, 
dt2), ci) =>
                 // SPARK-18058: we shall not care about the nullability of 
columns
-                if (TypeCoercion.findWiderTypeForTwo(dt1.asNullable, 
dt2.asNullable).isEmpty) {
+                if (dataTypesAreCompatibleFn(dt1, dt2)) {
                   failAnalysis(
                     s"""
-                      |${operator.nodeName} can only be performed on tables 
with the compatible
-                      |column types. ${dt1.catalogString} <> 
${dt2.catalogString} at the
-                      |${ordinalNumber(ci)} column of the ${ordinalNumber(ti + 
1)} table
+                       |${operator.nodeName} can only be performed on tables 
with the compatible
+                       |column types. The ${ordinalNumber(ci)} column of the
+                       |${ordinalNumber(ti + 1)} table is ${dt1.catalogString} 
type which is not
+                       |compatible with ${dt2.catalogString} at same column of 
first table
                     """.stripMargin.replace("\n", " ").trim())
                 }
               }
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
index 863bdc0..90cbe56 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercion.scala
@@ -243,6 +243,7 @@ abstract class TypeCoercionBase {
         case s: Union if s.childrenResolved && !s.byName &&
           s.children.forall(_.output.length == s.children.head.output.length) 
&& !s.resolved =>
           val newChildren: Seq[LogicalPlan] = 
buildNewChildrenWithWiderTypes(s.children)
+
           if (newChildren.isEmpty) {
             s -> Nil
           } else {
@@ -258,10 +259,10 @@ abstract class TypeCoercionBase {
 
       // Get a sequence of data types, each of which is the widest type of 
this specific attribute
       // in all the children
-      val targetTypes: Seq[DataType] =
-        getWidestTypes(children, attrIndex = 0, mutable.Queue[DataType]())
+      val targetTypes: Seq[Option[DataType]] =
+        getWidestTypes(children, attrIndex = 0, 
mutable.Queue[Option[DataType]]())
 
-      if (targetTypes.nonEmpty) {
+      if (targetTypes.exists(_.isDefined)) {
         // Add an extra Project if the targetTypes are different from the 
original types.
         children.map(widenTypes(_, targetTypes))
       } else {
@@ -273,29 +274,30 @@ abstract class TypeCoercionBase {
     @tailrec private def getWidestTypes(
         children: Seq[LogicalPlan],
         attrIndex: Int,
-        castedTypes: mutable.Queue[DataType]): Seq[DataType] = {
+        castedTypes: mutable.Queue[Option[DataType]]): Seq[Option[DataType]] = 
{
       // Return the result after the widen data types have been found for all 
the children
       if (attrIndex >= children.head.output.length) return castedTypes.toSeq
 
       // For the attrIndex-th attribute, find the widest type
-      findWiderCommonType(children.map(_.output(attrIndex).dataType)) match {
-        // If unable to find an appropriate widen type for this column, return 
an empty Seq
-        case None => Seq.empty[DataType]
-        // Otherwise, record the result in the queue and find the type for the 
next column
-        case Some(widenType) =>
-          castedTypes.enqueue(widenType)
-          getWidestTypes(children, attrIndex + 1, castedTypes)
-      }
+      val widenTypeOpt = 
findWiderCommonType(children.map(_.output(attrIndex).dataType))
+      castedTypes.enqueue(widenTypeOpt)
+      getWidestTypes(children, attrIndex + 1, castedTypes)
     }
 
     /** Given a plan, add an extra project on top to widen some columns' data 
types. */
-    private def widenTypes(plan: LogicalPlan, targetTypes: Seq[DataType]): 
LogicalPlan = {
+    private def widenTypes(plan: LogicalPlan, targetTypes: 
Seq[Option[DataType]]): LogicalPlan = {
+      var changed = false
       val casted = plan.output.zip(targetTypes).map {
-        case (e, dt) if e.dataType != dt =>
+        case (e, Some(dt)) if e.dataType != dt =>
+          changed = true
           Alias(Cast(e, dt, Some(conf.sessionLocalTimeZone)), e.name)()
         case (e, _) => e
       }
-      Project(casted, plan)
+      if (changed) {
+        Project(casted, plan)
+      } else {
+        plan
+      }
     }
   }
 
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
index 50e8d64..269d18a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala
@@ -327,7 +327,7 @@ case class Union(
         child.output.length == children.head.output.length &&
         // compare the data types with the first child
         child.output.zip(children.head.output).forall {
-          case (l, r) => l.dataType.sameType(r.dataType)
+          case (l, r) => DataType.equalsStructurally(l.dataType, r.dataType, 
true)
         })
     children.length > 1 && !(byName || allowMissingCol) && childrenResolved && 
allChildrenCompatible
   }
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
index d3dd3ac..cd72470 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/AnalysisSuite.scala
@@ -921,23 +921,28 @@ class AnalysisSuite extends AnalysisTest with Matchers {
 
     assertAnalysisError(r1,
       Seq("Union can only be performed on tables with the compatible column 
types. " +
-        "timestamp <> double at the second column of the second table"))
+        "The second column of the second table is timestamp type which is not 
compatible " +
+        "with double at same column of first table"))
 
     assertAnalysisError(r2,
       Seq("Union can only be performed on tables with the compatible column 
types. " +
-        "timestamp <> int at the third column of the second table"))
+        "The third column of the second table is timestamp type which is not 
compatible " +
+        "with int at same column of first table"))
 
     assertAnalysisError(r3,
       Seq("Union can only be performed on tables with the compatible column 
types. " +
-        "timestamp <> float at the 4th column of the second table"))
+        "The 4th column of the second table is timestamp type which is not 
compatible " +
+        "with float at same column of first table"))
 
     assertAnalysisError(r4,
       Seq("Except can only be performed on tables with the compatible column 
types. " +
-        "timestamp <> double at the second column of the second table"))
+        "The second column of the second table is timestamp type which is not 
compatible " +
+        "with double at same column of first table"))
 
     assertAnalysisError(r5,
       Seq("Intersect can only be performed on tables with the compatible 
column types. " +
-        "timestamp <> double at the second column of the second table"))
+        "The second column of the second table is timestamp type which is not 
compatible " +
+        "with double at same column of first table"))
   }
 
   test("SPARK-31975: Throw user facing error when use WindowFunction 
directly") {
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
index 6a7d7ef..2dc669b 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/analysis/TypeCoercionSuite.scala
@@ -1362,7 +1362,8 @@ class TypeCoercionSuite extends AnalysisTest {
 
     assert(unionRelation.children.head.isInstanceOf[Project])
     assert(unionRelation.children(1).isInstanceOf[Project])
-    assert(unionRelation.children(2).isInstanceOf[Project])
+    // thirdTable has same datatypes as expected ones, so no need to add extra 
Project.
+    assert(unionRelation.children(2).isInstanceOf[LocalRelation])
     assert(unionRelation.children(3).isInstanceOf[Project])
   }
 
diff --git a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out 
b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
index a1fe952..553d85a 100644
--- a/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/except-all.sql.out
@@ -141,7 +141,7 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. 
array<int> <> int at the first column of the second table
+ExceptAll can only be performed on tables with the compatible column types. 
The first column of the second table is array<int> type which is not compatible 
with int at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out 
b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
index caba8c6..3e12542 100644
--- a/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/intersect-all.sql.out
@@ -98,7 +98,7 @@ SELECT array(1), 2
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. 
array<int> <> int at the first column of the second table
+IntersectAll can only be performed on tables with the compatible column types. 
The first column of the second table is array<int> type which is not compatible 
with int at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out 
b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
index 5f7a82b..762d85a 100644
--- a/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/postgreSQL/union.sql.out
@@ -684,8 +684,8 @@ SELECT cast('3.4' as decimal(38, 18)) UNION SELECT 'foo'
 -- !query schema
 struct<>
 -- !query output
-org.apache.spark.SparkException
-Failed to merge incompatible data types decimal(38,18) and string
+org.apache.spark.sql.AnalysisException
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is string type which is not compatible with 
decimal(38,18) at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
 
b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
index a527b20..4f663b9 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/typeCoercion/native/widenSetOperationTypes.sql.out
@@ -88,7 +88,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> tinyint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
tinyint at same column of first table
 
 
 -- !query
@@ -97,7 +97,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> tinyint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
tinyint at same column of first table
 
 
 -- !query
@@ -106,7 +106,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> tinyint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
tinyint at same column of first table
 
 
 -- !query
@@ -115,7 +115,7 @@ SELECT cast(1 as tinyint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> tinyint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
tinyint at same column of first table
 
 
 -- !query
@@ -196,7 +196,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> smallint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
smallint at same column of first table
 
 
 -- !query
@@ -205,7 +205,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> smallint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
smallint at same column of first table
 
 
 -- !query
@@ -214,7 +214,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> smallint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
smallint at same column of first table
 
 
 -- !query
@@ -223,7 +223,7 @@ SELECT cast(1 as smallint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> smallint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
smallint at same column of first table
 
 
 -- !query
@@ -304,7 +304,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> int at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
int at same column of first table
 
 
 -- !query
@@ -313,7 +313,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> int at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
int at same column of first table
 
 
 -- !query
@@ -322,7 +322,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 
09:30:00.0' as timest
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> int at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
int at same column of first table
 
 
 -- !query
@@ -331,7 +331,7 @@ SELECT cast(1 as int) FROM t UNION SELECT cast('2017-12-11 
09:30:00' as date) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> int at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with int 
at same column of first table
 
 
 -- !query
@@ -412,7 +412,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> bigint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
bigint at same column of first table
 
 
 -- !query
@@ -421,7 +421,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> bigint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
bigint at same column of first table
 
 
 -- !query
@@ -430,7 +430,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> bigint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
bigint at same column of first table
 
 
 -- !query
@@ -439,7 +439,7 @@ SELECT cast(1 as bigint) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> bigint at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
bigint at same column of first table
 
 
 -- !query
@@ -520,7 +520,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> float at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
float at same column of first table
 
 
 -- !query
@@ -529,7 +529,7 @@ SELECT cast(1 as float) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> float at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
float at same column of first table
 
 
 -- !query
@@ -538,7 +538,7 @@ SELECT cast(1 as float) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as time
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> float at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
float at same column of first table
 
 
 -- !query
@@ -547,7 +547,7 @@ SELECT cast(1 as float) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> float at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
float at same column of first table
 
 
 -- !query
@@ -628,7 +628,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> double at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
double at same column of first table
 
 
 -- !query
@@ -637,7 +637,7 @@ SELECT cast(1 as double) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> double at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
double at same column of first table
 
 
 -- !query
@@ -646,7 +646,7 @@ SELECT cast(1 as double) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as tim
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> double at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
double at same column of first table
 
 
 -- !query
@@ -655,7 +655,7 @@ SELECT cast(1 as double) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as date)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> double at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
double at same column of first table
 
 
 -- !query
@@ -736,7 +736,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT 
cast('2' as binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> decimal(10,0) at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
decimal(10,0) at same column of first table
 
 
 -- !query
@@ -745,7 +745,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT cast(2 
as boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> decimal(10,0) at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
decimal(10,0) at same column of first table
 
 
 -- !query
@@ -754,7 +754,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> decimal(10,0) at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
decimal(10,0) at same column of first table
 
 
 -- !query
@@ -763,7 +763,7 @@ SELECT cast(1 as decimal(10, 0)) FROM t UNION SELECT 
cast('2017-12-11 09:30:00'
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> decimal(10,0) at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
decimal(10,0) at same column of first table
 
 
 -- !query
@@ -844,7 +844,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> string at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
string at same column of first table
 
 
 -- !query
@@ -853,7 +853,7 @@ SELECT cast(1 as string) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> string at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
string at same column of first table
 
 
 -- !query
@@ -880,7 +880,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
tinyint <> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is tinyint type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -889,7 +889,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
smallint <> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is smallint type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -898,7 +898,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> 
binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is int type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -907,7 +907,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint 
<> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is bigint type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -916,7 +916,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float 
<> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is float type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -925,7 +925,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double 
<> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is double type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -934,7 +934,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
decimal(10,0) <> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is decimal(10,0) type which is not compatible 
with binary at same column of first table
 
 
 -- !query
@@ -943,7 +943,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. string 
<> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is string type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -961,7 +961,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT cast(2 as 
boolean) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -970,7 +970,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -979,7 +979,7 @@ SELECT cast('1' as binary) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as dat
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> binary at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
binary at same column of first table
 
 
 -- !query
@@ -988,7 +988,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
tinyint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
tinyint <> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is tinyint type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -997,7 +997,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
smallint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
smallint <> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is smallint type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1006,7 +1006,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
int) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> 
boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is int type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1015,7 +1015,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
bigint) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is bigint type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1024,7 +1024,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
float) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is float type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1033,7 +1033,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
double) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is double type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1042,7 +1042,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
decimal(10, 0)) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
decimal(10,0) <> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is decimal(10,0) type which is not compatible 
with boolean at same column of first table
 
 
 -- !query
@@ -1051,7 +1051,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast(2 as 
string) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. string 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is string type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1060,7 +1060,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT cast('2' as 
binary) FROM t
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1077,7 +1077,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT 
cast('2017-12-11 09:30:00.0' as ti
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
timestamp <> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is timestamp type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1086,7 +1086,7 @@ SELECT cast(1 as boolean) FROM t UNION SELECT 
cast('2017-12-11 09:30:00' as date
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. date 
<> boolean at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is date type which is not compatible with 
boolean at same column of first table
 
 
 -- !query
@@ -1095,7 +1095,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
tinyint <> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is tinyint type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1104,7 +1104,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
smallint <> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is smallint type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1113,7 +1113,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> 
timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is int type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1122,7 +1122,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint 
<> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is bigint type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1131,7 +1131,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float 
<> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is float type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1140,7 +1140,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double 
<> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is double type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1149,7 +1149,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
decimal(10,0) <> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is decimal(10,0) type which is not compatible 
with timestamp at same column of first table
 
 
 -- !query
@@ -1167,7 +1167,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast('2' a
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1176,7 +1176,7 @@ SELECT cast('2017-12-12 09:30:00.0' as timestamp) FROM t 
UNION SELECT cast(2 as
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> timestamp at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
timestamp at same column of first table
 
 
 -- !query
@@ -1203,7 +1203,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as tinyint
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
tinyint <> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is tinyint type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1212,7 +1212,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as smallin
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
smallint <> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is smallint type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1221,7 +1221,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as int) FR
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. int <> 
date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is int type which is not compatible with date 
at same column of first table
 
 
 -- !query
@@ -1230,7 +1230,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as bigint)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. bigint 
<> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is bigint type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1239,7 +1239,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as float)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. float 
<> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is float type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1248,7 +1248,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as double)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. double 
<> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is double type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1257,7 +1257,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as decimal
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
decimal(10,0) <> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is decimal(10,0) type which is not compatible 
with date at same column of first table
 
 
 -- !query
@@ -1275,7 +1275,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast('2' as binar
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. binary 
<> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is binary type which is not compatible with 
date at same column of first table
 
 
 -- !query
@@ -1284,7 +1284,7 @@ SELECT cast('2017-12-12 09:30:00' as date) FROM t UNION 
SELECT cast(2 as boolean
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-Union can only be performed on tables with the compatible column types. 
boolean <> date at the first column of the second table
+Union can only be performed on tables with the compatible column types. The 
first column of the second table is boolean type which is not compatible with 
date at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
index ace56be..b8317fd 100644
--- a/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
+++ b/sql/core/src/test/resources/sql-tests/results/udf/udf-except-all.sql.out
@@ -141,7 +141,7 @@ SELECT array(1)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-ExceptAll can only be performed on tables with the compatible column types. 
array<int> <> int at the first column of the second table
+ExceptAll can only be performed on tables with the compatible column types. 
The first column of the second table is array<int> type which is not compatible 
with int at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
index f03e6c0..6210f1d 100644
--- 
a/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
+++ 
b/sql/core/src/test/resources/sql-tests/results/udf/udf-intersect-all.sql.out
@@ -98,7 +98,7 @@ SELECT array(1), udf(2)
 struct<>
 -- !query output
 org.apache.spark.sql.AnalysisException
-IntersectAll can only be performed on tables with the compatible column types. 
array<int> <> int at the first column of the second table
+IntersectAll can only be performed on tables with the compatible column types. 
The first column of the second table is array<int> type which is not compatible 
with int at same column of first table
 
 
 -- !query
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
index f8e0cfc..4e00de0 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameSetOperationsSuite.scala
@@ -999,8 +999,9 @@ class DataFrameSetOperationsSuite extends QueryTest with 
SharedSparkSession {
     }.getMessage
     assert(errMsg.contains("Union can only be performed on tables with" +
       " the compatible column types." +
-      " struct<c1:int,c2:int,c3:struct<c3:int,c5:int>> <> 
struct<c1:int,c2:int,c3:struct<c3:int>>" +
-      " at the third column of the second table"))
+      " The third column of the second table is 
struct<c1:int,c2:int,c3:struct<c3:int,c5:int>>" +
+      " type which is not compatible with 
struct<c1:int,c2:int,c3:struct<c3:int>> at same" +
+      " column of first table"))
 
     // diff Case sensitive attributes names and diff sequence scenario for 
unionByName
     df1 = Seq((1, 2, UnionClass1d(1, 2, Struct3(1)))).toDF("a", "b", "c")
@@ -1039,24 +1040,47 @@ class DataFrameSetOperationsSuite extends QueryTest 
with SharedSparkSession {
     }
   }
 
-  test("SPARK-36673: Union of structs with different orders") {
+  test("SPARK-36797: Union should resolve nested columns as top-level 
columns") {
+    // Different nested field names, but same nested field types. Union 
resolves column by position.
     val df1 = spark.range(2).withColumn("nested",
       struct(expr("id * 5 AS inner1"), struct(expr("id * 10 AS inner2"))))
     val df2 = spark.range(2).withColumn("nested",
       struct(expr("id * 5 AS inner2"), struct(expr("id * 10 AS inner1"))))
 
-    val err1 = intercept[AnalysisException](df1.union(df2).collect())
+    checkAnswer(df1.union(df2),
+      Row(0, Row(0, Row(0))) :: Row(0, Row(0, Row(0))) :: Row(1, Row(5, 
Row(10))) ::
+        Row(1, Row(5, Row(10))) :: Nil)
 
-    assert(err1.message
-      .contains("Union can only be performed on tables with the compatible 
column types"))
-
-    val df3 = spark.range(2).withColumn("nested",
-      struct(expr("id * 5 AS inner1"), struct(expr("id * 10 AS 
inner2").cast("string"))))
+    val df3 = spark.range(2).withColumn("nested array",
+      array(struct(expr("id * 5 AS inner1"), struct(expr("id * 10 AS 
inner2")))))
     val df4 = spark.range(2).withColumn("nested",
+      array(struct(expr("id * 5 AS inner2"), struct(expr("id * 10 AS 
inner1")))))
+
+    checkAnswer(df3.union(df4),
+      Row(0, Seq(Row(0, Row(0)))) :: Row(0, Seq(Row(0, Row(0)))) :: Row(1, 
Seq(Row(5, Row(10)))) ::
+        Row(1, Seq(Row(5, Row(10)))) :: Nil)
+
+    val df5 = spark.range(2).withColumn("nested array",
+      map(struct(expr("id * 5 AS key1")),
+        struct(expr("id * 5 AS inner1"), struct(expr("id * 10 AS inner2")))))
+    val df6 = spark.range(2).withColumn("nested",
+      map(struct(expr("id * 5 AS key2")),
+        struct(expr("id * 5 AS inner2"), struct(expr("id * 10 AS inner1")))))
+
+    checkAnswer(df5.union(df6),
+      Row(0, Map(Row(0) -> Row(0, Row(0)))) ::
+        Row(0, Map(Row(0) -> Row(0, Row(0)))) ::
+        Row(1, Map(Row(5) ->Row(5, Row(10)))) ::
+        Row(1, Map(Row(5) ->Row(5, Row(10)))) :: Nil)
+
+    // Different nested field names, and different nested field types.
+    val df7 = spark.range(2).withColumn("nested",
+      struct(expr("id * 5 AS inner1"), struct(expr("id * 10 AS 
inner2").cast("string"))))
+    val df8 = spark.range(2).withColumn("nested",
       struct(expr("id * 5 AS inner2").cast("string"), struct(expr("id * 10 AS 
inner1"))))
 
-    val err2 = intercept[AnalysisException](df3.union(df4).collect())
-    assert(err2.message
+    val err = intercept[AnalysisException](df7.union(df8).collect())
+    assert(err.message
       .contains("Union can only be performed on tables with the compatible 
column types"))
   }
 }
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
index 4559156..22e3e33 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
@@ -2689,10 +2689,9 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession with AdaptiveSpark
     }
 
     withSQLConf(SQLConf.CASE_SENSITIVE.key -> "true") {
-      val m1 = intercept[AnalysisException] {
-        sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))")
-      }.message
-      assert(m1.contains("Union can only be performed on tables with the 
compatible column types"))
+      // Union resolves nested columns by position too.
+      checkAnswer(sql("SELECT struct(1 a) UNION ALL (SELECT struct(2 A))"),
+        Row(Row(1)) :: Row(Row(2)) :: Nil)
 
       val m2 = intercept[AnalysisException] {
         sql("SELECT struct(1 a) EXCEPT (SELECT struct(2 A))")

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch master updated: [SPARK-36797][SQL] Union should resolve nested columns as top-level columns

Reply via email to