[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...

2018-07-18 Thread asfgit
Github user asfgit closed the pull request at:

https://github.com/apache/spark/pull/21795


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...

2018-07-18 Thread mn-mikke
Github user mn-mikke commented on a diff in the pull request:

https://github.com/apache/spark/pull/21795#discussion_r203379244
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -1147,65 +1149,66 @@ class DataFrameFunctionsSuite extends QueryTest 
with SharedSQLContext {
 val nseqi : Seq[Int] = null
 val nseqs : Seq[String] = null
 val df = Seq(
-
   (Seq(1), Seq(2, 3), Seq(5L, 6L), nseqi, Seq("a", "b", "c"), Seq("d", 
"e"), Seq("f"), nseqs),
   (Seq(1, 0), Seq.empty[Int], Seq(2L), nseqi, Seq("a"), 
Seq.empty[String], Seq(null), nseqs)
 ).toDF("i1", "i2", "i3", "in", "s1", "s2", "s3", "sn")
 
-val dummyFilter = (c: Column) => c.isNull || c.isNotNull // switch 
codeGen on
-
 // Simple test cases
-checkAnswer(
--- End diff --

Good catch!


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...

2018-07-18 Thread mn-mikke
Github user mn-mikke commented on a diff in the pull request:

https://github.com/apache/spark/pull/21795#discussion_r203378508
  
--- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/DataFrameFunctionsSuite.scala ---
@@ -924,26 +926,26 @@ class DataFrameFunctionsSuite extends QueryTest with 
SharedSQLContext {
   null
 ).toDF("i")
 
-checkAnswer(
-  idf.select(reverse('i)),
-  Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), 
Row(null))
-)
-checkAnswer(
-  idf.filter(dummyFilter('i)).select(reverse('i)),
-  Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), 
Row(null))
-)
-checkAnswer(
-  idf.selectExpr("reverse(i)"),
-  Seq(Row(Seq(7, 8, 9, 1)), Row(Seq(2, 7, 9, 8, 5)), Row(Seq.empty), 
Row(null))
-)
-checkAnswer(
-  oneRowDF.selectExpr("reverse(array(1, null, 2, null))"),
-  Seq(Row(Seq(null, 2, null, 1)))
-)
-checkAnswer(
-  oneRowDF.filter(dummyFilter('i)).selectExpr("reverse(array(1, null, 
2, null))"),
-  Seq(Row(Seq(null, 2, null, 1)))
-)
+def checkResult2(): Unit = {
--- End diff --

What about using more specific names for functions ```checkResult2```, 
```checkResult3``` etc.? Maybe ```checkStringTestCases```, 
```checkCasesWithArraysOfComplexTypes``` or something like that? 


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...

2018-07-18 Thread cloud-fan
Github user cloud-fan commented on a diff in the pull request:

https://github.com/apache/spark/pull/21795#discussion_r203315415
  
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
---
@@ -2336,46 +2336,40 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
 
 val sourceDF = spark.createDataFrame(rows, schema)
 
-val structWhenDF = sourceDF
+def structWhenDF: DataFrame = sourceDF
   .select(when('cond, struct(lit("a").as("val1"), 
lit(10).as("val2"))).otherwise('s) as "res")
   .select('res.getField("val1"))
-val arrayWhenDF = sourceDF
+def arrayWhenDF: DataFrame = sourceDF
   .select(when('cond, array(lit("a"), lit("b"))).otherwise('a) as 
"res")
   .select('res.getItem(0))
-val mapWhenDF = sourceDF
+def mapWhenDF: DataFrame = sourceDF
   .select(when('cond, map(lit(0), lit("a"))).otherwise('m) as "res")
   .select('res.getItem(0))
 
-val structIfDF = sourceDF
+def structIfDF: DataFrame = sourceDF
   .select(expr("if(cond, struct('a' as val1, 10 as val2), s)") as 
"res")
   .select('res.getField("val1"))
-val arrayIfDF = sourceDF
+def arrayIfDF: DataFrame = sourceDF
   .select(expr("if(cond, array('a', 'b'), a)") as "res")
   .select('res.getItem(0))
-val mapIfDF = sourceDF
+def mapIfDF: DataFrame = sourceDF
   .select(expr("if(cond, map(0, 'a'), m)") as "res")
   .select('res.getItem(0))
 
-def checkResult(df: DataFrame, codegenExpected: Boolean): Unit = {
-  
assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec] == 
codegenExpected)
-  checkAnswer(df, Seq(Row("a"), Row(null)))
+def checkResult(): Unit = {
+  checkAnswer(structWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(arrayWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(mapWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(structIfDF, Seq(Row("a"), Row(null)))
+  checkAnswer(arrayIfDF, Seq(Row("a"), Row(null)))
+  checkAnswer(mapIfDF, Seq(Row("a"), Row(null)))
 }
 
-// without codegen
-checkResult(structWhenDF, false)
-checkResult(arrayWhenDF, false)
-checkResult(mapWhenDF, false)
-checkResult(structIfDF, false)
-checkResult(arrayIfDF, false)
-checkResult(mapIfDF, false)
-
-// with codegen
-checkResult(structWhenDF.filter('cond.isNotNull), true)
--- End diff --

it will be very hard to write a general `checkAnswer`, because the local 
relation optimization can only handle `Project`. I'd like to wait for the 
general codegen config.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org



[GitHub] spark pull request #21795: [SPARK-24840][SQL] do not use dummy filter to swi...

2018-07-18 Thread mn-mikke
Github user mn-mikke commented on a diff in the pull request:

https://github.com/apache/spark/pull/21795#discussion_r203305670
  
--- Diff: sql/core/src/test/scala/org/apache/spark/sql/DataFrameSuite.scala 
---
@@ -2336,46 +2336,40 @@ class DataFrameSuite extends QueryTest with 
SharedSQLContext {
 
 val sourceDF = spark.createDataFrame(rows, schema)
 
-val structWhenDF = sourceDF
+def structWhenDF: DataFrame = sourceDF
   .select(when('cond, struct(lit("a").as("val1"), 
lit(10).as("val2"))).otherwise('s) as "res")
   .select('res.getField("val1"))
-val arrayWhenDF = sourceDF
+def arrayWhenDF: DataFrame = sourceDF
   .select(when('cond, array(lit("a"), lit("b"))).otherwise('a) as 
"res")
   .select('res.getItem(0))
-val mapWhenDF = sourceDF
+def mapWhenDF: DataFrame = sourceDF
   .select(when('cond, map(lit(0), lit("a"))).otherwise('m) as "res")
   .select('res.getItem(0))
 
-val structIfDF = sourceDF
+def structIfDF: DataFrame = sourceDF
   .select(expr("if(cond, struct('a' as val1, 10 as val2), s)") as 
"res")
   .select('res.getField("val1"))
-val arrayIfDF = sourceDF
+def arrayIfDF: DataFrame = sourceDF
   .select(expr("if(cond, array('a', 'b'), a)") as "res")
   .select('res.getItem(0))
-val mapIfDF = sourceDF
+def mapIfDF: DataFrame = sourceDF
   .select(expr("if(cond, map(0, 'a'), m)") as "res")
   .select('res.getItem(0))
 
-def checkResult(df: DataFrame, codegenExpected: Boolean): Unit = {
-  
assert(df.queryExecution.executedPlan.isInstanceOf[WholeStageCodegenExec] == 
codegenExpected)
-  checkAnswer(df, Seq(Row("a"), Row(null)))
+def checkResult(): Unit = {
+  checkAnswer(structWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(arrayWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(mapWhenDF, Seq(Row("a"), Row(null)))
+  checkAnswer(structIfDF, Seq(Row("a"), Row(null)))
+  checkAnswer(arrayIfDF, Seq(Row("a"), Row(null)))
+  checkAnswer(mapIfDF, Seq(Row("a"), Row(null)))
 }
 
-// without codegen
-checkResult(structWhenDF, false)
-checkResult(arrayWhenDF, false)
-checkResult(mapWhenDF, false)
-checkResult(structIfDF, false)
-checkResult(arrayIfDF, false)
-checkResult(mapIfDF, false)
-
-// with codegen
-checkResult(structWhenDF.filter('cond.isNotNull), true)
--- End diff --

@cloud-fan Thanks for the clarification and this PR!

Btw, there are many tests in ```DataFrameFunctionsSuite``` that test only 
the scenarios without codgen. WDYT about adding a generic ```checkAnswer``` 
method to ```QueryTest``` that would evaluate a dataframe for both cases 
similarly like ```ExressionEvalHelper.checkEvaluation``` does for expressions? 
If it's possible, of course.


---

-
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org