spark git commit: [SPARK-22501][SQL] Fix 64KB JVM bytecode limit problem with in

wenchen Thu, 16 Nov 2017 09:25:04 -0800

Repository: spark
Updated Branches:
  refs/heads/master 4e7f07e25 -> 7f2e62ee6



[SPARK-22501][SQL] Fix 64KB JVM bytecode limit problem with in

## What changes were proposed in this pull request?

This PR changes `In` code generation to place generated code for expression for 
expressions for arguments into separated methods if these size could be large.

## How was this patch tested?

Added new test cases into `PredicateSuite`

Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>

Closes #19733 from kiszk/SPARK-22501.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f2e62ee
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f2e62ee
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f2e62ee

Branch: refs/heads/master
Commit: 7f2e62ee6b9d1f32772a18d626fb9fd907aa7733
Parents: 4e7f07e
Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com>
Authored: Thu Nov 16 18:24:49 2017 +0100
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Thu Nov 16 18:24:49 2017 +0100

----------------------------------------------------------------------
 .../sql/catalyst/expressions/predicates.scala   | 20 +++++++++++++++-----
 .../catalyst/expressions/PredicateSuite.scala   |  6 ++++++
 2 files changed, 21 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/7f2e62ee/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
index 61df5e0..5d75c60 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala
@@ -236,24 +236,34 @@ case class In(value: Expression, list: Seq[Expression]) 
extends Predicate {
   override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
     val valueGen = value.genCode(ctx)
     val listGen = list.map(_.genCode(ctx))
+    ctx.addMutableState("boolean", ev.value, "")
+    ctx.addMutableState("boolean", ev.isNull, "")
+    val valueArg = ctx.freshName("valueArg")
     val listCode = listGen.map(x =>
       s"""
         if (!${ev.value}) {
           ${x.code}
           if (${x.isNull}) {
             ${ev.isNull} = true;
-          } else if (${ctx.genEqual(value.dataType, valueGen.value, x.value)}) 
{
+          } else if (${ctx.genEqual(value.dataType, valueArg, x.value)}) {
             ${ev.isNull} = false;
             ${ev.value} = true;
           }
         }
-       """).mkString("\n")
+       """)
+    val listCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) {
+      val args = ("InternalRow", ctx.INPUT_ROW) :: 
(ctx.javaType(value.dataType), valueArg) :: Nil
+      ctx.splitExpressions(listCode, "valueIn", args)
+    } else {
+      listCode.mkString("\n")
+    }
     ev.copy(code = s"""
       ${valueGen.code}
-      boolean ${ev.value} = false;
-      boolean ${ev.isNull} = ${valueGen.isNull};
+      ${ev.value} = false;
+      ${ev.isNull} = ${valueGen.isNull};
       if (!${ev.isNull}) {
-        $listCode
+        ${ctx.javaType(value.dataType)} $valueArg = ${valueGen.value};
+        $listCodes
       }
     """)
   }

http://git-wip-us.apache.org/repos/asf/spark/blob/7f2e62ee/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
index 1438a88..865092a 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala
@@ -239,6 +239,12 @@ class PredicateSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     }
   }
 
+  test("SPARK-22501: In should not generate codes beyond 64KB") {
+    val N = 3000
+    val sets = (1 to N).map(i => Literal(i.toDouble))
+    checkEvaluation(In(Literal(1.0D), sets), true)
+  }
+
   test("INSET") {
     val hS = HashSet[Any]() + 1 + 2
     val nS = HashSet[Any]() + 1 + 2 + null


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

spark git commit: [SPARK-22501][SQL] Fix 64KB JVM bytecode limit problem with in

Reply via email to