This is an automated email from the ASF dual-hosted git repository.
huaxingao pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new cf7e3574efc [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn
filter
cf7e3574efc is described below
commit cf7e3574efc1d4bb7233f18fcf344e94d26c2ac1
Author: huaxingao <[email protected]>
AuthorDate: Thu Apr 7 16:08:45 2022 -0700
[SPARK-38825][SQL][TEST] Add a test to cover parquet notIn filter
### What changes were proposed in this pull request?
Currently we don't have a test for parquet `notIn` filter, so add a test
for this
### Why are the changes needed?
to make tests more complete
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
new test
Closes #36109 from huaxingao/inFilter.
Authored-by: huaxingao <[email protected]>
Signed-off-by: huaxingao <[email protected]>
(cherry picked from commit d6fd0405b60875ac5e2c9daee1ec785f74e9b7a3)
Signed-off-by: huaxingao <[email protected]>
---
.../datasources/parquet/ParquetFilterSuite.scala | 21 +++++++++++++++++++++
1 file changed, 21 insertions(+)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 64a2ec6308c..71ea474409c 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1901,6 +1901,27 @@ abstract class ParquetFilterSuite extends QueryTest with
ParquetTest with Shared
}
}
}
+
+ test("SPARK-38825: in and notIn filters") {
+ import testImplicits._
+ withTempPath { file =>
+ Seq(1, 2, 0, -1, 99, 1000, 3, 7,
2).toDF("id").coalesce(1).write.mode("overwrite")
+ .parquet(file.getCanonicalPath)
+ var df = spark.read.parquet(file.getCanonicalPath)
+ var in = df.filter(col("id").isin(100, 3, 11, 12, 13))
+ var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13))
+ checkAnswer(in, Seq(Row(3)))
+ checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99),
Row(1000), Row(7), Row(2)))
+
+ Seq("mary", "martin", "lucy", "alex", "mary",
"dan").toDF("name").coalesce(1)
+ .write.mode("overwrite").parquet(file.getCanonicalPath)
+ df = spark.read.parquet(file.getCanonicalPath)
+ in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
+ notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
+ checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+ checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
+ }
+ }
}
@ExtendedSQLTest
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]