Repository: spark Updated Branches: refs/heads/branch-2.2 14054ffc5 -> 50f86e1fe
[SPARK-21884][SPARK-21477][BACKPORT-2.2][SQL] Mark LocalTableScanExec's input data transient This PR is to backport https://github.com/apache/spark/pull/18686 for resolving the issue in https://github.com/apache/spark/pull/19094 --- ## What changes were proposed in this pull request? This PR is to mark the parameter `rows` and `unsafeRow` of LocalTableScanExec transient. It can avoid serializing the unneeded objects. ## How was this patch tested? N/A Author: gatorsmile <[email protected]> Closes #19101 from gatorsmile/backport-21477. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/50f86e1f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/50f86e1f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/50f86e1f Branch: refs/heads/branch-2.2 Commit: 50f86e1fe2aad67e4472b24d910ea519b9ad746f Parents: 14054ff Author: gatorsmile <[email protected]> Authored: Fri Sep 1 13:48:50 2017 -0700 Committer: gatorsmile <[email protected]> Committed: Fri Sep 1 13:48:50 2017 -0700 ---------------------------------------------------------------------- .../org/apache/spark/sql/execution/LocalTableScanExec.scala | 4 ++-- .../spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala | 8 ++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala index 19c68c1..514ad70 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/LocalTableScanExec.scala @@ -28,12 +28,12 @@ import org.apache.spark.sql.execution.metric.SQLMetrics */ case class LocalTableScanExec( output: Seq[Attribute], - rows: Seq[InternalRow]) extends LeafExecNode { + @transient rows: Seq[InternalRow]) extends LeafExecNode { override lazy val metrics = Map( "numOutputRows" -> SQLMetrics.createMetric(sparkContext, "number of output rows")) - private lazy val unsafeRows: Array[InternalRow] = { + @transient private lazy val unsafeRows: Array[InternalRow] = { if (rows.isEmpty) { Array.empty } else { http://git-wip-us.apache.org/repos/asf/spark/blob/50f86e1f/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala index 58c3105..6c66902 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/OptimizeMetadataOnlyQuerySuite.scala @@ -117,4 +117,12 @@ class OptimizeMetadataOnlyQuerySuite extends QueryTest with SharedSQLContext { "select partcol1, max(partcol2) from srcpart where partcol1 = 0 group by rollup (partcol1)", "select partcol2 from (select partcol2 from srcpart where partcol1 = 0 union all " + "select partcol2 from srcpart where partcol1 = 1) t group by partcol2") + + test("SPARK-21884 Fix StackOverflowError on MetadataOnlyQuery") { + withTable("t_1000") { + sql("CREATE TABLE t_1000 (a INT, p INT) USING PARQUET PARTITIONED BY (p)") + (1 to 1000).foreach(p => sql(s"ALTER TABLE t_1000 ADD PARTITION (p=$p)")) + sql("SELECT COUNT(DISTINCT p) FROM t_1000").collect() + } + } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
