Repository: spark Updated Branches: refs/heads/branch-2.1 a3d5300a0 -> ff5818b8c
[SPARK-19512][BACKPORT-2.1][SQL] codegen for compare structs fails #16852 ## What changes were proposed in this pull request? Set currentVars to null in GenerateOrdering.genComparisons before genCode is called. genCode ignores INPUT_ROW if currentVars is not null and in genComparisons we want it to use INPUT_ROW. ## How was this patch tested? Added test with 2 queries in WholeStageCodegenSuite Author: Bogdan Raducanu <bogdan....@gmail.com> Closes #16875 from bogdanrdc/SPARK-19512-2.1. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/ff5818b8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/ff5818b8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/ff5818b8 Branch: refs/heads/branch-2.1 Commit: ff5818b8cee7c718ef5bdef125c8d6971d64acde Parents: a3d5300 Author: Bogdan Raducanu <bog...@databricks.com> Authored: Fri Feb 10 10:50:07 2017 +0100 Committer: Reynold Xin <r...@databricks.com> Committed: Fri Feb 10 10:50:07 2017 +0100 ---------------------------------------------------------------------- .../catalyst/expressions/codegen/CodeGenerator.scala | 2 -- .../expressions/codegen/GenerateOrdering.scala | 14 ++++++++++++-- .../spark/sql/execution/WholeStageCodegenSuite.scala | 12 ++++++++++++ 3 files changed, 24 insertions(+), 4 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala index 891c1aa..683b9cb 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/CodeGenerator.scala @@ -555,7 +555,6 @@ class CodegenContext { addNewFunction(compareFunc, funcCode) s"this.$compareFunc($c1, $c2)" case schema: StructType => - INPUT_ROW = "i" val comparisons = GenerateOrdering.genComparisons(this, schema) val compareFunc = freshName("compareStruct") val funcCode: String = @@ -566,7 +565,6 @@ class CodegenContext { if (a instanceof UnsafeRow && b instanceof UnsafeRow && a.equals(b)) { return 0; } - InternalRow i = null; $comparisons return 0; } http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala index b7335f1..f7fc2d5 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/codegen/GenerateOrdering.scala @@ -73,7 +73,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR */ def genComparisons(ctx: CodegenContext, ordering: Seq[SortOrder]): String = { val comparisons = ordering.map { order => + val oldCurrentVars = ctx.currentVars + ctx.INPUT_ROW = "i" + // to use INPUT_ROW we must make sure currentVars is null + ctx.currentVars = null val eval = order.child.genCode(ctx) + ctx.currentVars = oldCurrentVars val asc = order.isAscending val isNullA = ctx.freshName("isNullA") val primitiveA = ctx.freshName("primitiveA") @@ -119,7 +124,7 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """ } - ctx.splitExpressions( + val code = ctx.splitExpressions( expressions = comparisons, funcName = "compare", arguments = Seq(("InternalRow", "a"), ("InternalRow", "b")), @@ -142,6 +147,12 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR """ }.mkString }) + // make sure INPUT_ROW is declared even if splitExpressions + // returns an inlined block + s""" + |InternalRow ${ctx.INPUT_ROW} = null; + |$code + """.stripMargin } protected def create(ordering: Seq[SortOrder]): BaseOrdering = { @@ -165,7 +176,6 @@ object GenerateOrdering extends CodeGenerator[Seq[SortOrder], Ordering[InternalR ${ctx.declareAddedFunctions()} public int compare(InternalRow a, InternalRow b) { - InternalRow ${ctx.INPUT_ROW} = null; // Holds current row being evaluated. $comparisons return 0; } http://git-wip-us.apache.org/repos/asf/spark/blob/ff5818b8/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala index f26e5e7..9f6ef03 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/WholeStageCodegenSuite.scala @@ -113,4 +113,16 @@ class WholeStageCodegenSuite extends SparkPlanTest with SharedSQLContext { p.asInstanceOf[WholeStageCodegenExec].child.isInstanceOf[HashAggregateExec]).isDefined) assert(ds.collect() === Array(("a", 10.0), ("b", 3.0), ("c", 1.0))) } + + test("SPARK-19512 codegen for comparing structs is incorrect") { + // this would raise CompileException before the fix + spark.range(10) + .selectExpr("named_struct('a', id) as col1", "named_struct('a', id+2) as col2") + .filter("col1 = col2").count() + // this would raise java.lang.IndexOutOfBoundsException before the fix + spark.range(10) + .selectExpr("named_struct('a', id, 'b', id) as col1", + "named_struct('a',id+2, 'b',id+2) as col2") + .filter("col1 = col2").count() + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org