Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21603#discussion_r202256602
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala
---
@@ -747,6 +748,66 @@ class ParquetFilterSuite extends QueryTest with
ParquetTest with SharedSQLContex
// Test inverseCanDrop() has taken effect
testStringStartsWith(spark.range(1024).map(c => "100").toDF(), "value
not like '10%'")
}
+
+ test("SPARK-17091: Convert IN predicate to Parquet filter push-down") {
+ val schema = StructType(Seq(
+ StructField("a", IntegerType, nullable = false)
+ ))
+
+ val parquetSchema = new
SparkToParquetSchemaConverter(conf).convert(schema)
+
+ assertResult(Some(FilterApi.eq(intColumn("a"), null: Integer))) {
+ parquetFilters.createFilter(parquetSchema, sources.In("a",
Array(null)))
+ }
+
+ assertResult(Some(FilterApi.eq(intColumn("a"), 10: Integer))) {
+ parquetFilters.createFilter(parquetSchema, sources.In("a",
Array(10)))
+ }
+
+ // Remove duplicates
+ assertResult(Some(FilterApi.eq(intColumn("a"), 10: Integer))) {
+ parquetFilters.createFilter(parquetSchema, sources.In("a", Array(10,
10)))
+ }
+
+ assertResult(Some(or(
--- End diff --
I think you can remove this test because it looks basically a duplicate of
the below.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]