Repository: spark
Updated Branches:
refs/heads/branch-2.2 7600a7ab6 -> 6a996b362
[SPARK-17685][SQL] Make SortMergeJoinExec's currentVars is null when calling
createJoinKey
## What changes were proposed in this pull request?
The following SQL query cause `IndexOutOfBoundsException` issue when `LIMIT >
1310720`:
```sql
CREATE TABLE tab1(int int, int2 int, str string);
CREATE TABLE tab2(int int, int2 int, str string);
INSERT INTO tab1 values(1,1,'str');
INSERT INTO tab1 values(2,2,'str');
INSERT INTO tab2 values(1,1,'str');
INSERT INTO tab2 values(2,3,'str');
SELECT
count(*)
FROM
(
SELECT t1.int, t2.int2
FROM (SELECT * FROM tab1 LIMIT 1310721) t1
INNER JOIN (SELECT * FROM tab2 LIMIT 1310721) t2
ON (t1.int = t2.int AND t1.int2 = t2.int2)
) t;
```
This pull request fix this issue.
## How was this patch tested?
unit tests
Author: Yuming Wang <[email protected]>
Closes #17920 from wangyum/SPARK-17685.
(cherry picked from commit 771abeb46f637592aba2e63db2ed05b6cabfd0be)
Signed-off-by: Herman van Hovell <[email protected]>
Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/6a996b36
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/6a996b36
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/6a996b36
Branch: refs/heads/branch-2.2
Commit: 6a996b36283dcd22ff7aa38968a80f575d2f151e
Parents: 7600a7a
Author: Yuming Wang <[email protected]>
Authored: Tue May 9 19:45:00 2017 -0700
Committer: Herman van Hovell <[email protected]>
Committed: Tue May 9 19:45:09 2017 -0700
----------------------------------------------------------------------
.../spark/sql/execution/joins/SortMergeJoinExec.scala | 1 +
.../scala/org/apache/spark/sql/DataFrameJoinSuite.scala | 10 ++++++++++
2 files changed, 11 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/spark/blob/6a996b36/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
index c6aae1a..26fb610 100644
---
a/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
+++
b/sql/core/src/main/scala/org/apache/spark/sql/execution/joins/SortMergeJoinExec.scala
@@ -371,6 +371,7 @@ case class SortMergeJoinExec(
keys: Seq[Expression],
input: Seq[Attribute]): Seq[ExprCode] = {
ctx.INPUT_ROW = row
+ ctx.currentVars = null
keys.map(BindReferences.bindReference(_, input).genCode(ctx))
}
http://git-wip-us.apache.org/repos/asf/spark/blob/6a996b36/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
----------------------------------------------------------------------
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
index 4a52af6..aef0d7f 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFrameJoinSuite.scala
@@ -264,4 +264,14 @@ class DataFrameJoinSuite extends QueryTest with
SharedSQLContext {
val ab = a.join(b, Seq("a"), "fullouter")
checkAnswer(ab.join(c, "a"), Row(3, null, 4, 1) :: Nil)
}
+
+ test("SPARK-17685: WholeStageCodegenExec throws IndexOutOfBoundsException") {
+ val df = Seq((1, 1, "1"), (2, 2, "3")).toDF("int", "int2", "str")
+ val df2 = Seq((1, 1, "1"), (2, 3, "5")).toDF("int", "int2", "str")
+ val limit = 1310721
+ val innerJoin = df.limit(limit).join(df2.limit(limit), Seq("int", "int2"),
"inner")
+ .agg(count($"int"))
+ checkAnswer(innerJoin, Row(1) :: Nil)
+ }
+
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]