Repository: spark
Updated Branches:
  refs/heads/branch-2.0 c7e7b042d -> 0cc992c89


[SPARK-16845][SQL][BRANCH-2.0] GeneratedClass$SpecificOrdering` grows beyond 64 
KB

## What changes were proposed in this pull request?

This is a backport pr of #15480 into `branch-2.0`.

## How was this patch tested?

Existing tests.

Author: Liwei Lin <lwl...@gmail.com>

Closes #17157 from ueshin/issues/SPARK-16845_2.0.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0cc992c8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0cc992c8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0cc992c8

Branch: refs/heads/branch-2.0
Commit: 0cc992c893829cd16bcf0324c1fed3ff3c88addb
Parents: c7e7b04
Author: Liwei Lin <lwl...@gmail.com>
Authored: Mon Mar 6 13:11:29 2017 -0800
Committer: Wenchen Fan <wenc...@databricks.com>
Committed: Mon Mar 6 13:11:29 2017 -0800

----------------------------------------------------------------------
 .../expressions/codegen/CodeGenerator.scala     | 32 +++++++++++++++++---
 .../expressions/codegen/GenerateOrdering.scala  | 27 +++++++++++++++--
 .../catalyst/expressions/OrderingSuite.scala    | 10 ++++++
 3 files changed, 62 insertions(+), 7 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
index d1d7c31..23a890d 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala
@@ -620,6 +620,27 @@ class CodegenContext {
       // Cannot split these expressions because they are not created from a 
row object.
       return expressions.mkString("\n")
     }
+    splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil)
+  }
+
+  /**
+   * Splits the generated code of expressions into multiple functions, because 
function has
+   * 64kb code size limit in JVM
+   *
+   * @param expressions the codes to evaluate expressions.
+   * @param funcName the split function name base.
+   * @param arguments the list of (type, name) of the arguments of the split 
function.
+   * @param returnType the return type of the split function.
+   * @param makeSplitFunction makes split function body, e.g. add preparation 
or cleanup.
+   * @param foldFunctions folds the split function calls.
+   */
+  def splitExpressions(
+      expressions: Seq[String],
+      funcName: String,
+      arguments: Seq[(String, String)],
+      returnType: String = "void",
+      makeSplitFunction: String => String = identity,
+      foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): 
String = {
     val blocks = new ArrayBuffer[String]()
     val blockBuilder = new StringBuilder()
     for (code <- expressions) {
@@ -639,19 +660,20 @@ class CodegenContext {
       // inline execution if only one block
       blocks.head
     } else {
-      val apply = freshName("apply")
+      val func = freshName(funcName)
+      val argString = arguments.map { case (t, name) => s"$t $name" 
}.mkString(", ")
       val functions = blocks.zipWithIndex.map { case (body, i) =>
-        val name = s"${apply}_$i"
+        val name = s"${func}_$i"
         val code = s"""
-           |private void $name(InternalRow $row) {
-           |  $body
+           |private $returnType $name($argString) {
+           |  ${makeSplitFunction(body)}
            |}
          """.stripMargin
         addNewFunction(name, code)
         name
       }
 
-      functions.map(name => s"$name($row);").mkString("\n")
+      foldFunctions(functions.map(name => 
s"$name(${arguments.map(_._2).mkString(", ")})"))
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
index f4d35d2..94aefe4 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala
@@ -109,8 +109,31 @@ object GenerateOrdering extends 
CodeGenerator[Seq[SortOrder], Ordering[InternalR
             }
           }
       """
-    }.mkString("\n")
-    comparisons
+    }
+
+    ctx.splitExpressions(
+      expressions = comparisons,
+      funcName = "compare",
+      arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")),
+      returnType = "int",
+      makeSplitFunction = { body =>
+        s"""
+          InternalRow ${ctx.INPUT_ROW} = null;  // Holds current row being 
evaluated.
+          $body
+          return 0;
+        """
+      },
+      foldFunctions = { funCalls =>
+        funCalls.zipWithIndex.map { case (funCall, i) =>
+          val comp = ctx.freshName("comp")
+          s"""
+            int $comp = $funCall;
+            if ($comp != 0) {
+              return $comp;
+            }
+          """
+        }.mkString
+      })
   }
 
   protected def create(ordering: Seq[SortOrder]): BaseOrdering = {

http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
index 8cc2ab4..190fab5 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala
@@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with 
ExpressionEvalHelper {
       }
     }
   }
+
+  test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") {
+    val sortOrder = Literal("abc").asc
+
+    // this is passing prior to SPARK-16845, and it should also be passing 
after SPARK-16845
+    GenerateOrdering.generate(Array.fill(40)(sortOrder))
+
+    // verify that we can support up to 5000 ordering comparisons, which 
should be sufficient
+    GenerateOrdering.generate(Array.fill(5000)(sortOrder))
+  }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to