This is an automated email from the ASF dual-hosted git repository.

huaxingao pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new cf7e3574efc [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn 
filter
cf7e3574efc is described below

commit cf7e3574efc1d4bb7233f18fcf344e94d26c2ac1
Author: huaxingao <huaxin_...@apple.com>
AuthorDate: Thu Apr 7 16:08:45 2022 -0700

    [SPARK-38825][SQL][TEST] Add a test to cover parquet notIn filter
    
    ### What changes were proposed in this pull request?
    Currently we don't have a test for parquet `notIn` filter, so add a test 
for this
    
    ### Why are the changes needed?
    to make tests more complete
    
    ### Does this PR introduce _any_ user-facing change?
    no
    
    ### How was this patch tested?
    new test
    
    Closes #36109 from huaxingao/inFilter.
    
    Authored-by: huaxingao <huaxin_...@apple.com>
    Signed-off-by: huaxingao <huaxin_...@apple.com>
    (cherry picked from commit d6fd0405b60875ac5e2c9daee1ec785f74e9b7a3)
    Signed-off-by: huaxingao <huaxin_...@apple.com>
---
 .../datasources/parquet/ParquetFilterSuite.scala    | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
index 64a2ec6308c..71ea474409c 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
@@ -1901,6 +1901,27 @@ abstract class ParquetFilterSuite extends QueryTest with 
ParquetTest with Shared
       }
     }
   }
+
+  test("SPARK-38825: in and notIn filters") {
+    import testImplicits._
+    withTempPath { file =>
+      Seq(1, 2, 0, -1, 99, 1000, 3, 7, 
2).toDF("id").coalesce(1).write.mode("overwrite")
+        .parquet(file.getCanonicalPath)
+      var df = spark.read.parquet(file.getCanonicalPath)
+      var in = df.filter(col("id").isin(100, 3, 11, 12, 13))
+      var notIn = df.filter(!col("id").isin(100, 3, 11, 12, 13))
+      checkAnswer(in, Seq(Row(3)))
+      checkAnswer(notIn, Seq(Row(1), Row(2), Row(0), Row(-1), Row(99), 
Row(1000), Row(7), Row(2)))
+
+      Seq("mary", "martin", "lucy", "alex", "mary", 
"dan").toDF("name").coalesce(1)
+        .write.mode("overwrite").parquet(file.getCanonicalPath)
+      df = spark.read.parquet(file.getCanonicalPath)
+      in = df.filter(col("name").isin("mary", "victor", "leo", "alex"))
+      notIn = df.filter(!col("name").isin("mary", "victor", "leo", "alex"))
+      checkAnswer(in, Seq(Row("mary"), Row("alex"), Row("mary")))
+      checkAnswer(notIn, Seq(Row("martin"), Row("lucy"), Row("dan")))
+    }
+  }
 }
 
 @ExtendedSQLTest


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to