Repository: spark Updated Branches: refs/heads/master 4e7f07e25 -> 7f2e62ee6
[SPARK-22501][SQL] Fix 64KB JVM bytecode limit problem with in ## What changes were proposed in this pull request? This PR changes `In` code generation to place generated code for expression for expressions for arguments into separated methods if these size could be large. ## How was this patch tested? Added new test cases into `PredicateSuite` Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Closes #19733 from kiszk/SPARK-22501. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7f2e62ee Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7f2e62ee Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7f2e62ee Branch: refs/heads/master Commit: 7f2e62ee6b9d1f32772a18d626fb9fd907aa7733 Parents: 4e7f07e Author: Kazuaki Ishizaki <ishiz...@jp.ibm.com> Authored: Thu Nov 16 18:24:49 2017 +0100 Committer: Wenchen Fan <wenc...@databricks.com> Committed: Thu Nov 16 18:24:49 2017 +0100 ---------------------------------------------------------------------- .../sql/catalyst/expressions/predicates.scala | 20 +++++++++++++++----- .../catalyst/expressions/PredicateSuite.scala | 6 ++++++ 2 files changed, 21 insertions(+), 5 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7f2e62ee/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala index 61df5e0..5d75c60 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/predicates.scala @@ -236,24 +236,34 @@ case class In(value: Expression, list: Seq[Expression]) extends Predicate { override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = { val valueGen = value.genCode(ctx) val listGen = list.map(_.genCode(ctx)) + ctx.addMutableState("boolean", ev.value, "") + ctx.addMutableState("boolean", ev.isNull, "") + val valueArg = ctx.freshName("valueArg") val listCode = listGen.map(x => s""" if (!${ev.value}) { ${x.code} if (${x.isNull}) { ${ev.isNull} = true; - } else if (${ctx.genEqual(value.dataType, valueGen.value, x.value)}) { + } else if (${ctx.genEqual(value.dataType, valueArg, x.value)}) { ${ev.isNull} = false; ${ev.value} = true; } } - """).mkString("\n") + """) + val listCodes = if (ctx.INPUT_ROW != null && ctx.currentVars == null) { + val args = ("InternalRow", ctx.INPUT_ROW) :: (ctx.javaType(value.dataType), valueArg) :: Nil + ctx.splitExpressions(listCode, "valueIn", args) + } else { + listCode.mkString("\n") + } ev.copy(code = s""" ${valueGen.code} - boolean ${ev.value} = false; - boolean ${ev.isNull} = ${valueGen.isNull}; + ${ev.value} = false; + ${ev.isNull} = ${valueGen.isNull}; if (!${ev.isNull}) { - $listCode + ${ctx.javaType(value.dataType)} $valueArg = ${valueGen.value}; + $listCodes } """) } http://git-wip-us.apache.org/repos/asf/spark/blob/7f2e62ee/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala index 1438a88..865092a 100644 --- a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala +++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/PredicateSuite.scala @@ -239,6 +239,12 @@ class PredicateSuite extends SparkFunSuite with ExpressionEvalHelper { } } + test("SPARK-22501: In should not generate codes beyond 64KB") { + val N = 3000 + val sets = (1 to N).map(i => Literal(i.toDouble)) + checkEvaluation(In(Literal(1.0D), sets), true) + } + test("INSET") { val hS = HashSet[Any]() + 1 + 2 val nS = HashSet[Any]() + 1 + 2 + null --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org