[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...
Github user asfgit closed the pull request at: https://github.com/apache/spark/pull/21795 --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...
Github user mn-mikke commented on a diff in the pull request: https://github.com/apache/spark/pull/21795#discussion_r203379244 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala --- @@ -1147,65 +1149,66 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { val nseqi : Seq[Int] = null val nseqs : Seq[String] = null val df = Seq( - (Seq(1), Seq(2, 3), Seq(5L, 6L), nseqi, Seq("a", "b", "c"), Seq("d", "e"), Seq("f"), nseqs), (Seq(1, 0), Seq.empty[Int], Seq(2L), nseqi, Seq("a"), Seq.empty[String], Seq(null), nseqs) ).toDF("i1", "i2", "i3", "in", "s1", "s2", "s3", "sn") -val dummyFilter = (c: Column) => c.isNull || c.isNotNull // switch codeGen on - // Simple test cases -checkAnswer( --- End diff -- Good catch! --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...
Github user mn-mikke commented on a diff in the pull request: https://github.com/apache/spark/pull/21795#discussion_r203378508 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala --- @@ -924,26 +926,26 @@ class DataFrameFunctionsSuite extends QueryTest with SharedSQLContext { null ).toDF("i") -checkAnswer( - idf.select(reverse('i)), - Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), Row(null)) -) -checkAnswer( - idf.filter(dummyFilter('i)).select(reverse('i)), - Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), Row(null)) -) -checkAnswer( - idf.selectExpr("reverse(i)"), - Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), Row(null)) -) -checkAnswer( - oneRowDF.selectExpr("reverse(array(1, null, 2, null))"), - Seq(Row(Seq(null, 2, null, 1))) -) -checkAnswer( - oneRowDF.filter(dummyFilter('i)).selectExpr("reverse(array(1, null, 2, null))"), - Seq(Row(Seq(null, 2, null, 1))) -) +def checkResult2(): Unit = { --- End diff -- What about using more specific names for functions ```checkResult2```, ```checkResult3``` etc.? Maybe ```checkStringTestCases```, ```checkCasesWithArraysOfComplexTypes``` or something like that? --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...
Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21795#discussion_r203315415 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala --- @@ -2336,46 +2336,40 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val sourceDF = spark.createDataFrame(rows, schema) -val structWhenDF = sourceDF +def structWhenDF: DataFrame = sourceDF .select(when('cond, struct(lit("a").as("val1"), lit(10).as("val2"))).otherwise('s) as "res") .select('res.getField("val1")) -val arrayWhenDF = sourceDF +def arrayWhenDF: DataFrame = sourceDF .select(when('cond, array(lit("a"), lit("b"))).otherwise('a) as "res") .select('res.getItem(0)) -val mapWhenDF = sourceDF +def mapWhenDF: DataFrame = sourceDF .select(when('cond, map(lit(0), lit("a"))).otherwise('m) as "res") .select('res.getItem(0)) -val structIfDF = sourceDF +def structIfDF: DataFrame = sourceDF .select(expr("if(cond, struct('a' as val1, 10 as val2), s)") as "res") .select('res.getField("val1")) -val arrayIfDF = sourceDF +def arrayIfDF: DataFrame = sourceDF .select(expr("if(cond, array('a', 'b'), a)") as "res") .select('res.getItem(0)) -val mapIfDF = sourceDF +def mapIfDF: DataFrame = sourceDF .select(expr("if(cond, map(0, 'a'), m)") as "res") .select('res.getItem(0)) -def checkResult(df: DataFrame, codegenExpected: Boolean): Unit = { - assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec] == codegenExpected) - checkAnswer(df, Seq(Row("a"), Row(null))) +def checkResult(): Unit = { + checkAnswer(structWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(arrayWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(mapWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(structIfDF, Seq(Row("a"), Row(null))) + checkAnswer(arrayIfDF, Seq(Row("a"), Row(null))) + checkAnswer(mapIfDF, Seq(Row("a"), Row(null))) } -// without codegen -checkResult(structWhenDF, false) -checkResult(arrayWhenDF, false) -checkResult(mapWhenDF, false) -checkResult(structIfDF, false) -checkResult(arrayIfDF, false) -checkResult(mapIfDF, false) - -// with codegen -checkResult(structWhenDF.filter('cond.isNotNull), true) --- End diff -- it will be very hard to write a general `checkAnswer`, because the local relation optimization can only handle `Project`. I'd like to wait for the general codegen config. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org
[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...
Github user mn-mikke commented on a diff in the pull request: https://github.com/apache/spark/pull/21795#discussion_r203305670 --- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala --- @@ -2336,46 +2336,40 @@ class DataFrameSuite extends QueryTest with SharedSQLContext { val sourceDF = spark.createDataFrame(rows, schema) -val structWhenDF = sourceDF +def structWhenDF: DataFrame = sourceDF .select(when('cond, struct(lit("a").as("val1"), lit(10).as("val2"))).otherwise('s) as "res") .select('res.getField("val1")) -val arrayWhenDF = sourceDF +def arrayWhenDF: DataFrame = sourceDF .select(when('cond, array(lit("a"), lit("b"))).otherwise('a) as "res") .select('res.getItem(0)) -val mapWhenDF = sourceDF +def mapWhenDF: DataFrame = sourceDF .select(when('cond, map(lit(0), lit("a"))).otherwise('m) as "res") .select('res.getItem(0)) -val structIfDF = sourceDF +def structIfDF: DataFrame = sourceDF .select(expr("if(cond, struct('a' as val1, 10 as val2), s)") as "res") .select('res.getField("val1")) -val arrayIfDF = sourceDF +def arrayIfDF: DataFrame = sourceDF .select(expr("if(cond, array('a', 'b'), a)") as "res") .select('res.getItem(0)) -val mapIfDF = sourceDF +def mapIfDF: DataFrame = sourceDF .select(expr("if(cond, map(0, 'a'), m)") as "res") .select('res.getItem(0)) -def checkResult(df: DataFrame, codegenExpected: Boolean): Unit = { - assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec] == codegenExpected) - checkAnswer(df, Seq(Row("a"), Row(null))) +def checkResult(): Unit = { + checkAnswer(structWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(arrayWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(mapWhenDF, Seq(Row("a"), Row(null))) + checkAnswer(structIfDF, Seq(Row("a"), Row(null))) + checkAnswer(arrayIfDF, Seq(Row("a"), Row(null))) + checkAnswer(mapIfDF, Seq(Row("a"), Row(null))) } -// without codegen -checkResult(structWhenDF, false) -checkResult(arrayWhenDF, false) -checkResult(mapWhenDF, false) -checkResult(structIfDF, false) -checkResult(arrayIfDF, false) -checkResult(mapIfDF, false) - -// with codegen -checkResult(structWhenDF.filter('cond.isNotNull), true) --- End diff -- @cloud-fan Thanks for the clarification and this PR! Btw, there are many tests in ```DataFrameFunctionsSuite``` that test only the scenarios without codgen. WDYT about adding a generic ```checkAnswer``` method to ```QueryTest``` that would evaluate a dataframe for both cases similarly like ```ExressionEvalHelper.checkEvaluation``` does for expressions? If it's possible, of course. --- - To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org