Repository: spark Updated Branches: refs/heads/branch-2.0 c7e7b042d -> 0cc992c89
[SPARK-16845][SQL][BRANCH-2.0] GeneratedClass$SpecificOrdering` grows beyond 64 KB ## What changes were proposed in this pull request? This is a backport pr of #15480 into `branch-2.0`. ## How was this patch tested? Existing tests. Author: Liwei Lin <lwl...@gmail.com> Closes #17157 from ueshin/issues/SPARK-16845_2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0cc992c8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0cc992c8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0cc992c8 Branch: refs/heads/branch-2.0 Commit: 0cc992c893829cd16bcf0324c1fed3ff3c88addb Parents: c7e7b04 Author: Liwei Lin <lwl...@gmail.com> Authored: Mon Mar 6 13:11:29 2017 -0800 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Mon Mar 6 13:11:29 2017 -0800 ---------------------------------------------------------------------- .../expressions/codegen/CodeGenerator.scala | 32 +++++++++++++++++--- .../expressions/codegen/GenerateOrdering.scala | 27 +++++++++++++++-- .../catalyst/expressions/OrderingSuite.scala | 10 ++++++ 3 files changed, 62 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index d1d7c31..23a890d 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -620,6 +620,27 @@ class CodegenContext { // Cannot split these expressions because they are not created from a row object. return expressions.mkString("\n") } + splitExpressions(expressions, "apply", ("InternalRow", row) :: Nil) + } + + /** + * Splits the generated code of expressions into multiple functions, because function has + * 64kb code size limit in JVM + * + * @param expressions the codes to evaluate expressions. + * @param funcName the split function name base. + * @param arguments the list of (type, name) of the arguments of the split function. + * @param returnType the return type of the split function. + * @param makeSplitFunction makes split function body, e.g. add preparation or cleanup. + * @param foldFunctions folds the split function calls. + */ + def splitExpressions( + expressions: Seq[String], + funcName: String, + arguments: Seq[(String, String)], + returnType: String = "void", + makeSplitFunction: String => String = identity, + foldFunctions: Seq[String] => String = _.mkString("", ";\n", ";")): String = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() for (code <- expressions) { @@ -639,19 +660,20 @@ class CodegenContext { // inline execution if only one block blocks.head } else { - val apply = freshName("apply") + val func = freshName(funcName) + val argString = arguments.map { case (t, name) => s"$t $name" }.mkString(", ") val functions = blocks.zipWithIndex.map { case (body, i) => - val name = s"${apply}_$i" + val name = s"${func}_$i" val code = s""" - |private void $name(InternalRow $row) { - | $body + |private $returnType $name($argString) { + | ${makeSplitFunction(body)} |} """.stripMargin addNewFunction(name, code) name } - functions.map(name => s"$name($row);").mkString("\n") + foldFunctions(functions.map(name => s"$name(${arguments.map(_._2).mkString(", ")})")) } } http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index f4d35d2..94aefe4 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -109,8 +109,31 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR } } """ - }.mkString("\n") - comparisons + } + + ctx.splitExpressions( + expressions = comparisons, + funcName = "compare", + arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), + returnType = "int", + makeSplitFunction = { body => + s""" + InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. + $body + return 0; + """ + }, + foldFunctions = { funCalls => + funCalls.zipWithIndex.map { case (funCall, i) => + val comp = ctx.freshName("comp") + s""" + int $comp = $funCall; + if ($comp != 0) { + return $comp; + } + """ + }.mkString + }) } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { http://git-wip-us.apache.org/repos/asf/spark/blob/0cc992c8/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala index 8cc2ab4..190fab5 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/OrderingSuite.scala @@ -127,4 +127,14 @@ class OrderingSuite extends SparkFunSuite with ExpressionEvalHelper { } } } + + test("SPARK-16845: GeneratedClass$SpecificOrdering grows beyond 64 KB") { + val sortOrder = Literal("abc").asc + + // this is passing prior to SPARK-16845, and it should also be passing after SPARK-16845 + GenerateOrdering.generate(Array.fill(40)(sortOrder)) + + // verify that we can support up to 5000 ordering comparisons, which should be sufficient + GenerateOrdering.generate(Array.fill(5000)(sortOrder)) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org