spark git commit: [SPARK-18009][SQL] Fix ClassCastException while calling toLocalIterator() on dataframe produced by RunnableCommand
Repository: spark Updated Branches: refs/heads/branch-2.0 ea205e376 -> dcf2f090c [SPARK-18009][SQL] Fix ClassCastException while calling toLocalIterator() on dataframe produced by RunnableCommand A short code snippet that uses toLocalIterator() on a dataframe produced by a RunnableCommand reproduces the problem. toLocalIterator() is called by thriftserver when `spark.sql.thriftServer.incrementalCollect`is set to handle queries producing large result set. **Before** ```SQL scala> spark.sql("show databases") res0: org.apache.spark.sql.DataFrame = [databaseName: string] scala> res0.toLocalIterator() 16/10/26 03:00:24 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.GenericInternalRow cannot be cast to org.apache.spark.sql.catalyst.expressions.UnsafeRow ``` **After** ```SQL scala> spark.sql("drop database databases") res30: org.apache.spark.sql.DataFrame = [] scala> spark.sql("show databases") res31: org.apache.spark.sql.DataFrame = [databaseName: string] scala> res31.toLocalIterator().asScala foreach println [default] [parquet] ``` Added a test in DDLSuite Author: Dilip Biswal Closes #15642 from dilipbiswal/SPARK-18009. (cherry picked from commit dd4f088c1df6abd728e5544a17ba85322bedfe4c) Signed-off-by: Wenchen Fan Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dcf2f090 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dcf2f090 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dcf2f090 Branch: refs/heads/branch-2.0 Commit: dcf2f090cab768203e9767f050612d2838368c4f Parents: ea205e3 Author: Dilip Biswal Authored: Thu Oct 27 13:12:14 2016 +0800 Committer: Wenchen Fan Committed: Thu Oct 27 13:14:32 2016 +0800 -- .../org/apache/spark/sql/execution/command/commands.scala | 2 ++ .../org/apache/spark/sql/execution/command/DDLSuite.scala | 7 +++ 2 files changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dcf2f090/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 698c625..d82e54e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -66,6 +66,8 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends SparkPlan { override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray + override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator + override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray protected override def doExecute(): RDD[InternalRow] = { http://git-wip-us.apache.org/repos/asf/spark/blob/dcf2f090/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index caa2fca..252064d 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1489,4 +1489,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { assert(sql("show user functions").count() === 1L) } } + + test("SPARK-18009 calling toLocalIterator on commands") { +import scala.collection.JavaConverters._ +val df = sql("show databases") +val rows: Seq[Row] = df.toLocalIterator().asScala.toSeq +assert(rows.length > 0) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org
spark git commit: [SPARK-18009][SQL] Fix ClassCastException while calling toLocalIterator() on dataframe produced by RunnableCommand
Repository: spark Updated Branches: refs/heads/master f1aeed8b0 -> dd4f088c1 [SPARK-18009][SQL] Fix ClassCastException while calling toLocalIterator() on dataframe produced by RunnableCommand ## What changes were proposed in this pull request? A short code snippet that uses toLocalIterator() on a dataframe produced by a RunnableCommand reproduces the problem. toLocalIterator() is called by thriftserver when `spark.sql.thriftServer.incrementalCollect`is set to handle queries producing large result set. **Before** ```SQL scala> spark.sql("show databases") res0: org.apache.spark.sql.DataFrame = [databaseName: string] scala> res0.toLocalIterator() 16/10/26 03:00:24 ERROR Executor: Exception in task 0.0 in stage 0.0 (TID 0) java.lang.ClassCastException: org.apache.spark.sql.catalyst.expressions.GenericInternalRow cannot be cast to org.apache.spark.sql.catalyst.expressions.UnsafeRow ``` **After** ```SQL scala> spark.sql("drop database databases") res30: org.apache.spark.sql.DataFrame = [] scala> spark.sql("show databases") res31: org.apache.spark.sql.DataFrame = [databaseName: string] scala> res31.toLocalIterator().asScala foreach println [default] [parquet] ``` ## How was this patch tested? Added a test in DDLSuite Author: Dilip Biswal Closes #15642 from dilipbiswal/SPARK-18009. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/dd4f088c Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/dd4f088c Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/dd4f088c Branch: refs/heads/master Commit: dd4f088c1df6abd728e5544a17ba85322bedfe4c Parents: f1aeed8 Author: Dilip Biswal Authored: Thu Oct 27 13:12:14 2016 +0800 Committer: Wenchen Fan Committed: Thu Oct 27 13:12:14 2016 +0800 -- .../org/apache/spark/sql/execution/command/commands.scala | 2 ++ .../org/apache/spark/sql/execution/command/DDLSuite.scala | 7 +++ 2 files changed, 9 insertions(+) -- http://git-wip-us.apache.org/repos/asf/spark/blob/dd4f088c/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala -- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala index 698c625..d82e54e 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/command/commands.scala @@ -66,6 +66,8 @@ case class ExecutedCommandExec(cmd: RunnableCommand) extends SparkPlan { override def executeCollect(): Array[InternalRow] = sideEffectResult.toArray + override def executeToIterator: Iterator[InternalRow] = sideEffectResult.toIterator + override def executeTake(limit: Int): Array[InternalRow] = sideEffectResult.take(limit).toArray protected override def doExecute(): RDD[InternalRow] = { http://git-wip-us.apache.org/repos/asf/spark/blob/dd4f088c/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala -- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala index de326f8..b989d01 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/command/DDLSuite.scala @@ -1805,4 +1805,11 @@ class DDLSuite extends QueryTest with SharedSQLContext with BeforeAndAfterEach { } } } + + test("SPARK-18009 calling toLocalIterator on commands") { +import scala.collection.JavaConverters._ +val df = sql("show databases") +val rows: Seq[Row] = df.toLocalIterator().asScala.toSeq +assert(rows.length > 0) + } } - To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org