This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0663797 [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class 0663797 is described below commit 066379783af154f1c9e2fae6daaf444b6e383ab0 Author: nandorKollar <nandorkol...@users.noreply.github.com> AuthorDate: Fri Feb 22 14:07:55 2019 +0800 [SPARK-26930][SQL] Tests in ParquetFilterSuite don't verify filter class ## What changes were proposed in this pull request? Add assert to verify predicate class in ParquetFilterSuite ## How was this patch tested? Ran ParquetFilterSuite, tests passed Closes #23855 from nandorKollar/SPARK-26930. Lead-authored-by: nandorKollar <nandorkol...@users.noreply.github.com> Co-authored-by: Hyukjin Kwon <gurwls...@gmail.com> Co-authored-by: Nandor Kollar <nkol...@cloudera.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../datasources/parquet/ParquetFilterSuite.scala | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala index 9cfc943..255f7db 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/parquet/ParquetFilterSuite.scala @@ -29,6 +29,7 @@ import org.apache.spark.SparkException import org.apache.spark.sql._ import org.apache.spark.sql.catalyst.dsl.expressions._ import org.apache.spark.sql.catalyst.expressions._ +import org.apache.spark.sql.catalyst.optimizer.InferFiltersFromConstraints import org.apache.spark.sql.catalyst.planning.PhysicalOperation import org.apache.spark.sql.execution.datasources.{DataSourceStrategy, HadoopFsRelation, LogicalRelation} import org.apache.spark.sql.functions._ @@ -91,6 +92,10 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex SQLConf.PARQUET_FILTER_PUSHDOWN_TIMESTAMP_ENABLED.key -> "true", SQLConf.PARQUET_FILTER_PUSHDOWN_DECIMAL_ENABLED.key -> "true", SQLConf.PARQUET_FILTER_PUSHDOWN_STRING_STARTSWITH_ENABLED.key -> "true", + // Disable adding filters from constraints because it adds, for instance, + // is-not-null to pushed filters, which makes it hard to test if the pushed + // filter is expected or not (this had to be fixed with SPARK-13495). + SQLConf.OPTIMIZER_EXCLUDED_RULES.key -> InferFiltersFromConstraints.ruleName, SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") { val query = df .select(output.map(e => Column(e)): _*) @@ -109,13 +114,16 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex DataSourceStrategy.selectFilters(maybeRelation.get, maybeAnalyzedPredicate.toSeq) assert(selectedFilters.nonEmpty, "No filter is pushed down") - selectedFilters.foreach { pred => + val pushedParquetFilters = selectedFilters.map { pred => val maybeFilter = parquetFilters.createFilter( new SparkToParquetSchemaConverter(conf).convert(df.schema), pred) assert(maybeFilter.isDefined, s"Couldn't generate filter predicate for $pred") - // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`) - maybeFilter.exists(_.getClass === filterClass) + maybeFilter.get } + // Doesn't bother checking type parameters here (e.g. `Eq[Integer]`) + assert(pushedParquetFilters.exists(_.getClass === filterClass), + s"${pushedParquetFilters.map(_.getClass).toList} did not contain ${filterClass}.") + checker(stripSparkFilter(query), expected) } } @@ -1073,20 +1081,20 @@ class ParquetFilterSuite extends QueryTest with ParquetTest with SharedSQLContex checkFilterPredicate( !'_1.startsWith("").asInstanceOf[Predicate], - classOf[UserDefinedByInstance[_, _]], + classOf[Operators.Not], Seq().map(Row(_))) Seq("2", "2s", "2st", "2str", "2str2").foreach { prefix => checkFilterPredicate( !'_1.startsWith(prefix).asInstanceOf[Predicate], - classOf[UserDefinedByInstance[_, _]], + classOf[Operators.Not], Seq("1str1", "3str3", "4str4").map(Row(_))) } Seq("2S", "null", "2str22").foreach { prefix => checkFilterPredicate( !'_1.startsWith(prefix).asInstanceOf[Predicate], - classOf[UserDefinedByInstance[_, _]], + classOf[Operators.Not], Seq("1str1", "2str2", "3str3", "4str4").map(Row(_))) } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org