Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/22031#discussion_r208445494
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -2117,6 +2117,65 @@ class DataFrameFunctionsSuite extends QueryTest with
SharedSQLContext {
assert(ex4.getMessage.contains("data type mismatch: argument 3
requires int type"))
}
+ test("zip_with function - arrays for primitive type not containing
null") {
+ val df1 = Seq(
+ (Seq(9001, 9002, 9003), Seq(4, 5, 6)),
+ (Seq(1, 2), Seq(3, 4)),
+ (Seq.empty[Int], Seq.empty[Int]),
+ (null, null)
+ ).toDF("val1", "val2")
+ val df2 = Seq(
+ (Seq(1, 2, 3), Seq("a", "b", "c")),
+ (Seq(1, 2, 3), Seq("a", "b"))
+ ).toDF("val1", "val2")
+
+ def testArrayOfPrimitiveTypeNotContainsNull(): Unit = {
+ val expectedValue1 = Seq(
+ Row(Seq(9005, 9007, 9009)),
+ Row(Seq(4, 6)),
+ Row(Seq.empty),
+ Row(null))
+ checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"),
expectedValue1)
+
+ val expectedValue2 = Seq(
+ Row(Seq(Row("a", 1), Row("b", 2), Row("c", 3))),
+ Row(Seq(Row("a", 1), Row("b", 2), Row(null, 3))))
+ checkAnswer(df2.selectExpr("zip_with(val1, val2, (x, y) -> (y,
x))"), expectedValue2)
+ }
+
+ // Test with local relation, the Project will be evaluated without
codegen
+ testArrayOfPrimitiveTypeNotContainsNull()
+ // Test with cached relation, the Project will be evaluated with
codegen
+ df1.cache()
+ df2.cache()
+ testArrayOfPrimitiveTypeNotContainsNull()
+ }
+
+ test("zip_with function - arrays for primitive type containing null") {
+ val df1 = Seq[(Seq[Integer], Seq[Integer])](
+ (Seq(9001, null, 9003), Seq(4, 5, 6)),
+ (Seq(1, null, 2, 4), Seq(3, 4)),
+ (Seq.empty, Seq.empty),
+ (null, null)
+ ).toDF("val1", "val2")
+
+ def testArrayOfPrimitiveTypeContainsNull(): Unit = {
+ val expectedValue1 = Seq(
+ Row(Seq(9005, null, 9009)),
+ Row(Seq(4, null, null, null)),
+ Row(Seq.empty),
+ Row(null))
+ checkAnswer(df1.selectExpr("zip_with(val1, val2, (x, y) -> x + y)"),
expectedValue1)
+ }
+
+ // Test with local relation, the Project will be evaluated without
codegen
+ testArrayOfPrimitiveTypeContainsNull()
+ // Test with cached relation, the Project will be evaluated with
codegen
+ df1.cache()
+ testArrayOfPrimitiveTypeContainsNull()
+ }
+
+
--- End diff --
Also can you add tests to `HigherOrderFunctionsSuite` to check more
explicit patterns?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]