This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new dd6eca7550c [SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null)
and notIn(null)
dd6eca7550c is described below
commit dd6eca7550c25dbcad9f12caf9fccfcad981d33f
Author: huaxingao <[email protected]>
AuthorDate: Mon Apr 18 21:27:57 2022 -0700
[SPARK-38825][SQL][TEST][FOLLOWUP] Add test for in(null) and notIn(null)
### What changes were proposed in this pull request?
Add test for filter `in(null)` and `notIn(null)`
### Why are the changes needed?
to make tests more complete
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
new test
Closes #36248 from huaxingao/inNotIn.
Authored-by: huaxingao <[email protected]>
Signed-off-by: huaxingao <[email protected]>
(cherry picked from commit b760e4a686939bdb837402286b8d3d8b445c5ed4)
Signed-off-by: huaxingao <[email protected]>
---
.../datasources/parquet/ParquetFilterSuite.scala | 22 +++++++++++++++++-----
1 file changed, 17 insertions(+), 5 deletions(-)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 71ea474409c..7a09011f27c 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1905,21 +1905,33 @@ abstract class ParquetFilterSuite extends QueryTest
with ParquetTest with Shared
test("SPARK-38825: in and notIn filters") {
import testImplicits._
withTempPath { file =>
- Seq(1, 2, 0, -1, 99, 1000, 3, 7,
2).toDF("id").coalesce(1).write.mode("overwrite")
+ Seq(1, 2, 0, -1, 99, Integer.MAX_VALUE, 1000, 3, 7, Integer.MIN_VALUE, 2)
+ .toDF("id").coalesce(1).write.mode("overwrite")
.parquet(file.getCanonicalPath)
var df = spark.read.parquet(file.getCanonicalPath)
- var in = df.filter(col("id").isin(100, 3, 11, 12, 13))
- var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13))
- checkAnswer(in, Seq(Row(3)))
+ var in = df.filter(col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE,
Integer.MIN_VALUE))
+ var notIn =
+ df.filter(!col("id").isin(100, 3, 11, 12, 13, Integer.MAX_VALUE,
Integer.MIN_VALUE))
+ checkAnswer(in, Seq(Row(3), Row(-2147483648), Row(2147483647)))
checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99),
Row(1000), Row(7), Row(2)))
- Seq("mary", "martin", "lucy", "alex", "mary",
"dan").toDF("name").coalesce(1)
+ Seq("mary", "martin", "lucy", "alex", null, "mary",
"dan").toDF("name").coalesce(1)
.write.mode("overwrite").parquet(file.getCanonicalPath)
df = spark.read.parquet(file.getCanonicalPath)
in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
+
+ in = df.filter(col("name").isin("mary", "victor", "leo", "alex", null))
+ notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex",
null))
+ checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+ checkAnswer(notIn, Seq())
+
+ in = df.filter(col("name").isin(null))
+ notIn = df.filter(!col("name").isin(null))
+ checkAnswer(in, Seq())
+ checkAnswer(notIn, Seq())
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]