Repository: spark Updated Branches: refs/heads/master ac70c972a -> d6e555244
[SPARK-4292][SQL] Result set iterator bug in JDBC/ODBC select * from src, get the wrong result set as follows: ``` ... | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 309 | val_309 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | | 97 | val_97 | ... ``` Author: wangfei <[email protected]> Closes #3149 from scwf/SPARK-4292 and squashes the following commits: 1574a43 [wangfei] using result.collect 8b2d845 [wangfei] adding test f64eddf [wangfei] result set iter bug Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/d6e55524 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/d6e55524 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/d6e55524 Branch: refs/heads/master Commit: d6e55524437026c0c76addeba8f99249a8316716 Parents: ac70c97 Author: wangfei <[email protected]> Authored: Fri Nov 7 12:55:11 2014 -0800 Committer: Michael Armbrust <[email protected]> Committed: Fri Nov 7 12:55:11 2014 -0800 ---------------------------------------------------------------------- .../thriftserver/HiveThriftServer2Suite.scala | 23 ++++++++++++++++++++ .../spark/sql/hive/thriftserver/Shim12.scala | 5 ++--- .../spark/sql/hive/thriftserver/Shim13.scala | 5 ++--- 3 files changed, 27 insertions(+), 6 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/d6e55524/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala ---------------------------------------------------------------------- diff --git a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala index 65d910a..bba29b2 100644 --- a/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala +++ b/sql/hive-thriftserver/src/test/scala/org/apache/spark/sql/hive/thriftserver/HiveThriftServer2Suite.scala @@ -267,4 +267,27 @@ class HiveThriftServer2Suite extends FunSuite with Logging { assert(resultSet.getString(1) === s"spark.sql.hive.version=${HiveShim.version}") } } + + test("SPARK-4292 regression: result set iterator issue") { + withJdbcStatement() { statement => + val dataFilePath = + Thread.currentThread().getContextClassLoader.getResource("data/files/small_kv.txt") + + val queries = Seq( + "DROP TABLE IF EXISTS test_4292", + "CREATE TABLE test_4292(key INT, val STRING)", + s"LOAD DATA LOCAL INPATH '$dataFilePath' OVERWRITE INTO TABLE test_4292") + + queries.foreach(statement.execute) + + val resultSet = statement.executeQuery("SELECT key FROM test_4292") + + Seq(238, 86, 311, 27, 165).foreach { key => + resultSet.next() + assert(resultSet.getInt(1) == key) + } + + statement.executeQuery("DROP TABLE IF EXISTS test_4292") + } + } } http://git-wip-us.apache.org/repos/asf/spark/blob/d6e55524/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala ---------------------------------------------------------------------- diff --git a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala index 8077d0e..e3ba991 100644 --- a/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala +++ b/sql/hive-thriftserver/v0.12.0/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim12.scala @@ -202,13 +202,12 @@ private[hive] class SparkExecuteStatementOperation( hiveContext.sparkContext.setLocalProperty("spark.scheduler.pool", pool) } iter = { - val resultRdd = result.queryExecution.toRdd val useIncrementalCollect = hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean if (useIncrementalCollect) { - resultRdd.toLocalIterator + result.toLocalIterator } else { - resultRdd.collect().iterator + result.collect().iterator } } dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray http://git-wip-us.apache.org/repos/asf/spark/blob/d6e55524/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala ---------------------------------------------------------------------- diff --git a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala index 2c1983d..f2ceba8 100644 --- a/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala +++ b/sql/hive-thriftserver/v0.13.1/src/main/scala/org/apache/spark/sql/hive/thriftserver/Shim13.scala @@ -87,13 +87,12 @@ private[hive] class SparkExecuteStatementOperation( val groupId = round(random * 1000000).toString hiveContext.sparkContext.setJobGroup(groupId, statement) iter = { - val resultRdd = result.queryExecution.toRdd val useIncrementalCollect = hiveContext.getConf("spark.sql.thriftServer.incrementalCollect", "false").toBoolean if (useIncrementalCollect) { - resultRdd.toLocalIterator + result.toLocalIterator } else { - resultRdd.collect().iterator + result.collect().iterator } } dataTypes = result.queryExecution.analyzed.output.map(_.dataType).toArray --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
