Repository: spark
Updated Branches:
  refs/heads/branch-2.1 12c937ede -> 50f28dfe4


[SPARK-17685][SQL] Make SortMergeJoinExec's currentVars is null when calling 
createJoinKey

## What changes were proposed in this pull request?

The following SQL query cause `IndexOutOfBoundsException` issue when `LIMIT > 
1310720`:
```sql
CREATE TABLE tab1(int int, int2 int, str string);
CREATE TABLE tab2(int int, int2 int, str string);
INSERT INTO tab1 values(1,1,'str');
INSERT INTO tab1 values(2,2,'str');
INSERT INTO tab2 values(1,1,'str');
INSERT INTO tab2 values(2,3,'str');

SELECT
  count(*)
FROM
  (
    SELECT t1.int, t2.int2
    FROM (SELECT * FROM tab1 LIMIT 1310721) t1
    INNER JOIN (SELECT * FROM tab2 LIMIT 1310721) t2
    ON (t1.int = t2.int AND t1.int2 = t2.int2)
  ) t;
```

This pull request fix this issue.

## How was this patch tested?

unit tests

Author: Yuming Wang <wgy...@gmail.com>

Closes #17920 from wangyum/SPARK-17685.

(cherry picked from commit 771abeb46f637592aba2e63db2ed05b6cabfd0be)
Signed-off-by: Herman van Hovell <hvanhov...@databricks.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f28dfe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f28dfe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f28dfe

Branch: refs/heads/branch-2.1
Commit: 50f28dfe43410dadabbecc62e34fde8bacc8aee6
Parents: 12c937e
Author: Yuming Wang <wgy...@gmail.com>
Authored: Tue May 9 19:45:00 2017 -0700
Committer: Herman van Hovell <hvanhov...@databricks.com>
Committed: Tue May 9 19:45:22 2017 -0700

----------------------------------------------------------------------
 .../spark/sql/execution/joins/SortMergeJoinExec.scala     |  1 +
 .../scala/org/apache/spark/sql/DataFrameJoinSuite.scala   | 10 ++++++++++
 2 files changed, 11 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/50f28dfe/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index a1f9416..89a9b38 100644
--- 
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++ 
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -342,6 +342,7 @@ case class SortMergeJoinExec(
       keys: Seq[Expression],
       input: Seq[Attribute]): Seq[ExprCode] = {
     ctx.INPUT_ROW = row
+    ctx.currentVars = null
     keys.map(BindReferences.bindReference(_, input).genCode(ctx))
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/50f28dfe/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 541ffb5..9383e83 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -248,4 +248,14 @@ class DataFrameJoinSuite extends QueryTest with 
SharedSQLContext {
     val ab = a.join(b, Seq("a"), "fullouter")
     checkAnswer(ab.join(c, "a"), Row(3, null, 4, 1) :: Nil)
   }
+
+  test("SPARK-17685: WholeStageCodegenExec throws IndexOutOfBoundsException") {
+    val df = Seq((1, 1, "1"), (2, 2, "3")).toDF("int", "int2", "str")
+    val df2 = Seq((1, 1, "1"), (2, 3, "5")).toDF("int", "int2", "str")
+    val limit = 1310721
+    val innerJoin = df.limit(limit).join(df2.limit(limit), Seq("int", "int2"), 
"inner")
+      .agg(count($"int"))
+    checkAnswer(innerJoin, Row(1) :: Nil)
+  }
+
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to