Repository: spark
Updated Branches:
  refs/heads/branch-2.0 1f573855f -> dc61ed406


[SPARK-18091][SQL] Deep if expressions cause Generated SpecificUnsafeProjection 
code to exceed JVM code size limit

## What changes were proposed in this pull request?

Fix for SPARK-18091 which is a bug related to large if expressions causing 
generated SpecificUnsafeProjection code to exceed JVM code size limit.

This PR changes if expression's code generation to place its predicate, true 
value and false value expressions' generated code in separate methods in 
context so as to never generate too long combined code.
## How was this patch tested?

Added a unit test and also tested manually with the application (having 
transformations similar to the unit test) which caused the issue to be 
identified in the first place.

Author: Kapil Singh <[email protected]>

Closes #15620 from kapilsingh5050/SPARK-18091-IfCodegenFix.

(cherry picked from commit e463678b194e08be4a8bc9d1d45461d6c77a15ee)
Signed-off-by: Wenchen Fan <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dc61ed40
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dc61ed40
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dc61ed40

Branch: refs/heads/branch-2.0
Commit: dc61ed40680cccea6bf49b306c642a8289558e35
Parents: 1f57385
Author: Kapil Singh <[email protected]>
Authored: Sun Dec 4 17:16:40 2016 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Sun Dec 4 20:36:54 2016 +0800

----------------------------------------------------------------------
 .../expressions/conditionalExpressions.scala    | 82 ++++++++++++++++----
 .../expressions/CodeGenerationSuite.scala       | 21 +++++
 2 files changed, 90 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/dc61ed40/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
index f9499cf..e8325ae 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/conditionalExpressions.scala
@@ -60,19 +60,75 @@ case class If(predicate: Expression, trueValue: Expression, 
falseValue: Expressi
     val trueEval = trueValue.genCode(ctx)
     val falseEval = falseValue.genCode(ctx)
 
-    ev.copy(code = s"""
-      ${condEval.code}
-      boolean ${ev.isNull} = false;
-      ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
-      if (!${condEval.isNull} && ${condEval.value}) {
-        ${trueEval.code}
-        ${ev.isNull} = ${trueEval.isNull};
-        ${ev.value} = ${trueEval.value};
-      } else {
-        ${falseEval.code}
-        ${ev.isNull} = ${falseEval.isNull};
-        ${ev.value} = ${falseEval.value};
-      }""")
+    // place generated code of condition, true value and false value in 
separate methods if
+    // their code combined is large
+    val combinedLength = condEval.code.length + trueEval.code.length + 
falseEval.code.length
+    val generatedCode = if (combinedLength > 1024 &&
+      // Split these expressions only if they are created from a row object
+      (ctx.INPUT_ROW != null && ctx.currentVars == null)) {
+
+      val (condFuncName, condGlobalIsNull, condGlobalValue) =
+        createAndAddFunction(ctx, condEval, predicate.dataType, 
"evalIfCondExpr")
+      val (trueFuncName, trueGlobalIsNull, trueGlobalValue) =
+        createAndAddFunction(ctx, trueEval, trueValue.dataType, 
"evalIfTrueExpr")
+      val (falseFuncName, falseGlobalIsNull, falseGlobalValue) =
+        createAndAddFunction(ctx, falseEval, falseValue.dataType, 
"evalIfFalseExpr")
+      s"""
+        $condFuncName(${ctx.INPUT_ROW});
+        boolean ${ev.isNull} = false;
+        ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+        if (!$condGlobalIsNull && $condGlobalValue) {
+          $trueFuncName(${ctx.INPUT_ROW});
+          ${ev.isNull} = $trueGlobalIsNull;
+          ${ev.value} = $trueGlobalValue;
+        } else {
+          $falseFuncName(${ctx.INPUT_ROW});
+          ${ev.isNull} = $falseGlobalIsNull;
+          ${ev.value} = $falseGlobalValue;
+        }
+      """
+    }
+    else {
+      s"""
+        ${condEval.code}
+        boolean ${ev.isNull} = false;
+        ${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
+        if (!${condEval.isNull} && ${condEval.value}) {
+          ${trueEval.code}
+          ${ev.isNull} = ${trueEval.isNull};
+          ${ev.value} = ${trueEval.value};
+        } else {
+          ${falseEval.code}
+          ${ev.isNull} = ${falseEval.isNull};
+          ${ev.value} = ${falseEval.value};
+        }
+      """
+    }
+
+    ev.copy(code = generatedCode)
+  }
+
+  private def createAndAddFunction(
+      ctx: CodegenContext,
+      ev: ExprCode,
+      dataType: DataType,
+      baseFuncName: String): (String, String, String) = {
+    val globalIsNull = ctx.freshName("isNull")
+    ctx.addMutableState("boolean", globalIsNull, s"$globalIsNull = false;")
+    val globalValue = ctx.freshName("value")
+    ctx.addMutableState(ctx.javaType(dataType), globalValue,
+      s"$globalValue = ${ctx.defaultValue(dataType)};")
+    val funcName = ctx.freshName(baseFuncName)
+    val funcBody =
+      s"""
+         |private void $funcName(InternalRow ${ctx.INPUT_ROW}) {
+         |  ${ev.code.trim}
+         |  $globalIsNull = ${ev.isNull};
+         |  $globalValue = ${ev.value};
+         |}
+         """.stripMargin
+    ctx.addNewFunction(funcName, funcBody)
+    (funcName, globalIsNull, globalValue)
   }
 
   override def toString: String = s"if ($predicate) $trueValue else 
$falseValue"

http://git-wip-us.apache.org/repos/asf/spark/blob/dc61ed40/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
index 45dcfca..5a15f46 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/CodeGenerationSuite.scala
@@ -95,6 +95,27 @@ class CodeGenerationSuite extends SparkFunSuite with 
ExpressionEvalHelper {
     assert(actual(0) == cases)
   }
 
+  test("SPARK-18091: split large if expressions into blocks due to JVM code 
size limit") {
+    val inStr = "StringForTesting"
+    val row = create_row(inStr)
+    val inputStrAttr = 'a.string.at(0)
+
+    var strExpr: Expression = inputStrAttr
+    for (_ <- 1 to 13) {
+      strExpr = If(EqualTo(Decode(Encode(strExpr, "utf-8"), "utf-8"), 
inputStrAttr),
+        strExpr, strExpr)
+    }
+
+    val expressions = Seq(strExpr)
+    val plan = GenerateUnsafeProjection.generate(expressions, true)
+    val actual = plan(row).toSeq(expressions.map(_.dataType))
+    val expected = Seq(UTF8String.fromString(inStr))
+
+    if (!checkResult(actual, expected)) {
+      fail(s"Incorrect Evaluation: expressions: $expressions, actual: $actual, 
expected: $expected")
+    }
+  }
+
   test("SPARK-14793: split wide array creation into blocks due to JVM code 
size limit") {
     val length = 5000
     val expressions = Seq(CreateArray(List.fill(length)(EqualTo(Literal(1), 
Literal(1)))))


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to