[spark] branch branch-2.3 updated: [SPARK-28361][SQL][TEST] Test equality of generated code with id in class name

dongjoon Fri, 12 Jul 2019 16:29:19 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-2.3
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-2.3 by this push:
     new 53c903c  [SPARK-28361][SQL][TEST] Test equality of generated code with 
id in class name
53c903c is described below

commit 53c903c9383162022160bb31d0baf43fded90f12
Author: gatorsmile <[email protected]>
AuthorDate: Fri Jul 12 16:06:44 2019 -0700

    [SPARK-28361][SQL][TEST] Test equality of generated code with id in class 
name
    
    A code gen test in WholeStageCodeGenSuite was flaky because it used the 
codegen metrics class to test if the generated code for equivalent plans was 
identical under a particular flag. This patch switches the test to compare the 
generated code directly.
    
    N/A
    
    Closes #25131 from gatorsmile/WholeStageCodegenSuite.
    
    Authored-by: gatorsmile <[email protected]>
    Signed-off-by: Dongjoon Hyun <[email protected]>
    (cherry picked from commit 60b89cf8097ff583a29a6a19f1db4afa780f3109)
    Signed-off-by: Dongjoon Hyun <[email protected]>
---
 .../sql/execution/WholeStageCodegenSuite.scala     | 37 +++++++++++-----------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
index d0e6ec4..eee226d 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala
@@ -17,8 +17,7 @@
 
 package org.apache.spark.sql.execution
 
-import org.apache.spark.metrics.source.CodegenMetrics
-import org.apache.spark.sql.{QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{Dataset, QueryTest, Row, SaveMode}
 import org.apache.spark.sql.catalyst.expressions.codegen.{CodeAndComment, 
CodeGenerator}
 import org.apache.spark.sql.execution.aggregate.HashAggregateExec
 import org.apache.spark.sql.execution.columnar.InMemoryTableScanExec
@@ -168,10 +167,10 @@ class WholeStageCodegenSuite extends QueryTest with 
SharedSQLContext {
         .select("int")
 
       val plan = df.queryExecution.executedPlan
-      assert(!plan.find(p =>
+      assert(plan.find(p =>
         p.isInstanceOf[WholeStageCodegenExec] &&
           p.asInstanceOf[WholeStageCodegenExec].child.children(0)
-            .isInstanceOf[SortMergeJoinExec]).isDefined)
+            .isInstanceOf[SortMergeJoinExec]).isEmpty)
       assert(df.collect() === Array(Row(1), Row(2)))
     }
   }
@@ -204,6 +203,13 @@ class WholeStageCodegenSuite extends QueryTest with 
SharedSQLContext {
     
wholeStageCodeGenExec.get.asInstanceOf[WholeStageCodegenExec].doCodeGen()._2
   }
 
+  def genCode(ds: Dataset[_]): Seq[CodeAndComment] = {
+    val plan = ds.queryExecution.executedPlan
+    val wholeStageCodeGenExecs = plan.collect { case p: WholeStageCodegenExec 
=> p }
+    assert(wholeStageCodeGenExecs.nonEmpty, "WholeStageCodegenExec is 
expected")
+    wholeStageCodeGenExecs.map(_.doCodeGen()._2)
+  }
+
   ignore("SPARK-21871 check if we can get large code size when compiling too 
long functions") {
     val codeWithShortFunctions = genGroupByCode(3)
     val (_, maxCodeSize1) = CodeGenerator.compile(codeWithShortFunctions)
@@ -283,9 +289,9 @@ class WholeStageCodegenSuite extends QueryTest with 
SharedSQLContext {
       val df = spark.range(100)
       val join = df.join(df, "id")
       val plan = join.queryExecution.executedPlan
-      assert(!plan.find(p =>
+      assert(plan.find(p =>
         p.isInstanceOf[WholeStageCodegenExec] &&
-          p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0).isDefined,
+          p.asInstanceOf[WholeStageCodegenExec].codegenStageId == 0).isEmpty,
         "codegen stage IDs should be preserved through ReuseExchange")
       checkAnswer(join, df.toDF)
     }
@@ -295,18 +301,13 @@ class WholeStageCodegenSuite extends QueryTest with 
SharedSQLContext {
     import testImplicits._
 
     withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_USE_ID_IN_CLASS_NAME.key -> "true") 
{
-      val bytecodeSizeHisto = 
CodegenMetrics.METRIC_GENERATED_METHOD_BYTECODE_SIZE
-
-      // the same query run twice should hit the codegen cache
-      spark.range(3).select('id + 2).collect
-      val after1 = bytecodeSizeHisto.getCount
-      spark.range(3).select('id + 2).collect
-      val after2 = bytecodeSizeHisto.getCount // same query shape as above, 
deliberately
-      // bytecodeSizeHisto's count is always monotonically increasing if new 
compilation to
-      // bytecode had occurred. If the count stayed the same that means we've 
got a cache hit.
-      assert(after1 == after2, "Should hit codegen cache. No new compilation 
to bytecode expected")
-
-      // a different query can result in codegen cache miss, that's by design
+      // the same query run twice should produce identical code, which would 
imply a hit in
+      // the generated code cache.
+      val ds1 = spark.range(3).select('id + 2)
+      val code1 = genCode(ds1)
+      val ds2 = spark.range(3).select('id + 2)
+      val code2 = genCode(ds2) // same query shape as above, deliberately
+      assert(code1 == code2, "Should produce same code")
     }
   }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-2.3 updated: [SPARK-28361][SQL][TEST] Test equality of generated code with id in class name

Reply via email to