Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/21061#discussion_r183005260
--- Diff:
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CollectionExpressionsSuite.scala
---
@@ -169,4 +169,45 @@ class CollectionExpressionsSuite extends SparkFunSuite
with ExpressionEvalHelper
checkEvaluation(Reverse(as7), null)
checkEvaluation(Reverse(aa), Seq(Seq("e"), Seq("c", "d"), Seq("a",
"b")))
}
+
+ test("Array Union") {
+ val a00 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType, false))
+ val a01 = Literal.create(Seq(4, 2), ArrayType(IntegerType, false))
+ val a02 = Literal.create(Seq(1, 2, 3), ArrayType(IntegerType))
+ val a03 = Literal.create(Seq(1, 2, null, 4, 5), ArrayType(IntegerType))
+ val a04 = Literal.create(Seq(-5, 4, -3, 2, -1), ArrayType(IntegerType))
+ val a05 = Literal.create(Seq.empty[Int], ArrayType(IntegerType))
+
+ val a10 = Literal.create(Seq(1L, 2L, 3L), ArrayType(LongType, false))
+ val a11 = Literal.create(Seq(4L, 2L), ArrayType(LongType, false))
+ val a12 = Literal.create(Seq(1L, 2L, 3L), ArrayType(LongType))
+ val a13 = Literal.create(Seq(1L, 2L, null, 4L, 5L),
ArrayType(LongType))
+ val a14 = Literal.create(Seq(-5L, 4L, -3L, 2L, -1L),
ArrayType(LongType))
+ val a15 = Literal.create(Seq.empty[Long], ArrayType(LongType))
+
+ val a20 = Literal.create(Seq("b", "a", "c"), ArrayType(StringType))
+ val a21 = Literal.create(Seq("c", "d", "a", "f"),
ArrayType(StringType))
+ val a22 = Literal.create(Seq("b", null, "a", "g"),
ArrayType(StringType))
+ val a23 = Literal.create(Seq("b", "a", "c"), ArrayType(StringType,
false))
+ val a24 = Literal.create(Seq("c", "d", "a", "f"),
ArrayType(StringType, false))
+
+ val a30 = Literal.create(Seq(null, null), ArrayType(NullType))
+
+ checkEvaluation(ArrayUnion(a00, a01),
UnsafeArrayData.fromPrimitiveArray(Array(4, 1, 3, 2)))
+ checkEvaluation(ArrayUnion(a01, a02), Seq(4, 2, 1, 3))
+ checkEvaluation(ArrayUnion(a03, a04), Seq(1, 2, null, 4, 5, -5, -3,
-1))
+ checkEvaluation(ArrayUnion(a03, a05), Seq(1, 2, null, 4, 5))
+
+ checkEvaluation(
+ ArrayUnion(a10, a11), UnsafeArrayData.fromPrimitiveArray(Array(4L,
1L, 3L, 2L)))
+ checkEvaluation(ArrayUnion(a11, a12), Seq(4L, 2L, 1L, 3L))
+ checkEvaluation(ArrayUnion(a13, a14), Seq(1L, 2L, null, 4L, 5L, -5L,
-3L, -1L))
+ checkEvaluation(ArrayUnion(a13, a15), Seq(1L, 2L, null, 4L, 5L))
+
+ checkEvaluation(ArrayUnion(a20, a21), Seq("b", "a", "c", "d", "f"))
+ checkEvaluation(ArrayUnion(a20, a22), Seq("b", "a", "c", null, "g"))
+ checkEvaluation(ArrayUnion(a23, a24), Seq("b", "c", "d", "a", "f"))
+
+ checkEvaluation(ArrayUnion(a30, a30), Seq(null))
--- End diff --
The following error occurs. When I looked at other tests, it does not look
strange. This is because `null` has no type information.
```
cannot resolve 'array_union(NULL, `b`)' due to data type mismatch: Element
type in both arrays must be the same;;
'Project [array_union(null, b#118) AS array_union(a, b)#121]
+- AnalysisBarrier
+- Project [_1#114 AS a#117, _2#115 AS b#118]
+- LocalRelation [_1#114, _2#115]
org.apache.spark.sql.AnalysisException: cannot resolve 'array_union(NULL,
`b`)' due to data type mismatch: Element type in both arrays must be the same;;
'Project [array_union(null, b#118) AS array_union(a, b)#121]
+- AnalysisBarrier
+- Project [_1#114 AS a#117, _2#115 AS b#118]
+- LocalRelation [_1#114, _2#115]
at
org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(package.scala:42)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:93)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(CheckAnalysis.scala:85)
...
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]