Repository: spark Updated Branches: refs/heads/master 0bdb4e516 -> ba0e79f57
[SPARK-22772][SQL] Use splitExpressionsWithCurrentInputs to split codes in elt ## What changes were proposed in this pull request? In SPARK-22550 which fixes 64KB JVM bytecode limit problem with elt, `buildCodeBlocks` is used to split codes. However, we should use `splitExpressionsWithCurrentInputs` because it considers both normal and wholestage codgen (it is not supported yet, so it simply doesn't split the codes). ## How was this patch tested? Existing tests. Author: Liang-Chi Hsieh <vii...@gmail.com> Closes #19964 from viirya/SPARK-22772. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ba0e79f5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ba0e79f5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ba0e79f5 Branch: refs/heads/master Commit: ba0e79f57caa279773fb014b7883ee5d69dd0a68 Parents: 0bdb4e5 Author: Liang-Chi Hsieh <vii...@gmail.com> Authored: Wed Dec 13 13:54:16 2017 -0800 Committer: gatorsmile <gatorsm...@gmail.com> Committed: Wed Dec 13 13:54:16 2017 -0800 ---------------------------------------------------------------------- .../expressions/codegen/CodeGenerator.scala | 2 +- .../expressions/stringExpressions.scala | 81 ++++++++++---------- 2 files changed, 43 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ba0e79f5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 257c3f1..b1d9311 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -878,7 +878,7 @@ class CodegenContext { * * @param expressions the codes to evaluate expressions. */ - def buildCodeBlocks(expressions: Seq[String]): Seq[String] = { + private def buildCodeBlocks(expressions: Seq[String]): Seq[String] = { val blocks = new ArrayBuffer[String]() val blockBuilder = new StringBuilder() var length = 0 http://git-wip-us.apache.org/repos/asf/spark/blob/ba0e79f5/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala index 47f0b57..8c4d2fd 100755 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/stringExpressions.scala @@ -289,53 +289,56 @@ case class Elt(children: Seq[Expression]) val index = indexExpr.genCode(ctx) val strings = stringExprs.map(_.genCode(ctx)) val indexVal = ctx.freshName("index") + val indexMatched = ctx.freshName("eltIndexMatched") + val stringVal = ctx.freshName("stringVal") + ctx.addMutableState(ctx.javaType(dataType), stringVal) + val assignStringValue = strings.zipWithIndex.map { case (eval, index) => s""" - case ${index + 1}: - ${eval.code} - $stringVal = ${eval.isNull} ? null : ${eval.value}; - break; - """ + |if ($indexVal == ${index + 1}) { + | ${eval.code} + | $stringVal = ${eval.isNull} ? null : ${eval.value}; + | $indexMatched = true; + | continue; + |} + """.stripMargin } - val cases = ctx.buildCodeBlocks(assignStringValue) - val codes = if (cases.length == 1) { - s""" - UTF8String $stringVal = null; - switch ($indexVal) { - ${cases.head} - } - """ - } else { - var prevFunc = "null" - for (c <- cases.reverse) { - val funcName = ctx.freshName("eltFunc") - val funcBody = s""" - private UTF8String $funcName(InternalRow ${ctx.INPUT_ROW}, int $indexVal) { - UTF8String $stringVal = null; - switch ($indexVal) { - $c - default: - return $prevFunc; - } - return $stringVal; - } - """ - val fullFuncName = ctx.addNewFunction(funcName, funcBody) - prevFunc = s"$fullFuncName(${ctx.INPUT_ROW}, $indexVal)" - } - s"UTF8String $stringVal = $prevFunc;" - } + val codes = ctx.splitExpressionsWithCurrentInputs( + expressions = assignStringValue, + funcName = "eltFunc", + extraArguments = ("int", indexVal) :: Nil, + returnType = ctx.JAVA_BOOLEAN, + makeSplitFunction = body => + s""" + |${ctx.JAVA_BOOLEAN} $indexMatched = false; + |do { + | $body + |} while (false); + |return $indexMatched; + """.stripMargin, + foldFunctions = _.map { funcCall => + s""" + |$indexMatched = $funcCall; + |if ($indexMatched) { + | continue; + |} + """.stripMargin + }.mkString) ev.copy( s""" - ${index.code} - final int $indexVal = ${index.value}; - $codes - UTF8String ${ev.value} = $stringVal; - final boolean ${ev.isNull} = ${ev.value} == null; - """) + |${index.code} + |final int $indexVal = ${index.value}; + |${ctx.JAVA_BOOLEAN} $indexMatched = false; + |$stringVal = null; + |do { + | $codes + |} while (false); + |final UTF8String ${ev.value} = $stringVal; + |final boolean ${ev.isNull} = ${ev.value} == null; + """.stripMargin) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org