This is an automated email from the ASF dual-hosted git repository.
etudenhoefner pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new fac485c56b Spark: Analyze but don't optimize view body during creation
(#14681)
fac485c56b is described below
commit fac485c56b6e24b9081fed9e001077632945613c
Author: jbewing <[email protected]>
AuthorDate: Wed Dec 3 01:19:41 2025 -0500
Spark: Analyze but don't optimize view body during creation (#14681)
---
.../spark/sql/catalyst/analysis/CheckViews.scala | 1 +
.../spark/sql/catalyst/analysis/ResolveViews.scala | 1 +
.../plans/logical/views/CreateIcebergView.scala | 24 ++++++++++++++--------
.../v2/ExtendedDataSourceV2Strategy.scala | 1 +
.../spark/sql/catalyst/analysis/CheckViews.scala | 1 +
.../spark/sql/catalyst/analysis/ResolveViews.scala | 1 +
.../plans/logical/views/CreateIcebergView.scala | 24 ++++++++++++++--------
.../v2/ExtendedDataSourceV2Strategy.scala | 1 +
.../spark/sql/catalyst/analysis/CheckViews.scala | 1 +
.../spark/sql/catalyst/analysis/ResolveViews.scala | 1 +
.../plans/logical/views/CreateIcebergView.scala | 24 ++++++++++++++--------
.../v2/ExtendedDataSourceV2Strategy.scala | 1 +
12 files changed, 57 insertions(+), 24 deletions(-)
diff --git
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
index a7c903dfdb..549aefaae2 100644
---
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
+++
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
@@ -48,6 +48,7 @@ object CheckViews extends (LogicalPlan => Unit) {
_,
_,
replace,
+ _,
_) =>
verifyColumnCount(resolvedIdent, columnAliases, query)
SchemaUtils.checkColumnNameDuplication(
diff --git
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
index 1e0e5e8951..4f7e2b4d0f 100644
---
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
+++
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
@@ -73,6 +73,7 @@ case class ResolveViews(spark: SparkSession) extends
Rule[LogicalPlan] with Look
_,
_,
_,
+ _,
_) if query.resolved && !c.rewritten =>
val aliased = aliasColumns(query, columnAliases, columnComments)
c.copy(
diff --git
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
index c5dadae701..84a00a4a9a 100644
---
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
+++
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
@@ -18,9 +18,12 @@
*/
package org.apache.spark.sql.catalyst.plans.logical.views
-import org.apache.spark.sql.catalyst.plans.logical.BinaryCommand
+import org.apache.spark.sql.catalyst.analysis.AnalysisContext
+import org.apache.spark.sql.catalyst.plans.logical.AnalysisOnlyCommand
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+// Align Iceberg's CreateIcebergView with Spark’s CreateViewCommand by
extending AnalysisOnlyCommand.
+// The command’s children are analyzed then hidden, so the optimizer/planner
won’t traverse the view body.
case class CreateIcebergView(
child: LogicalPlan,
queryText: String,
@@ -32,14 +35,19 @@ case class CreateIcebergView(
properties: Map[String, String],
allowExisting: Boolean,
replace: Boolean,
- rewritten: Boolean = false)
- extends BinaryCommand {
- override def left: LogicalPlan = child
+ rewritten: Boolean = false,
+ isAnalyzed: Boolean = false)
+ extends AnalysisOnlyCommand {
- override def right: LogicalPlan = query
+ override def childrenToAnalyze: Seq[LogicalPlan] = child :: query :: Nil
+
+ override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan =
{
+ copy(isAnalyzed = true)
+ }
override protected def withNewChildrenInternal(
- newLeft: LogicalPlan,
- newRight: LogicalPlan): LogicalPlan =
- copy(child = newLeft, query = newRight)
+ newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+ assert(!isAnalyzed)
+ copy(child = newChildren.head, query = newChildren.last)
+ }
}
diff --git
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
index afdddb7fab..53def0e952 100644
---
a/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
+++
b/spark/v3.4/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
@@ -198,6 +198,7 @@ case class ExtendedDataSourceV2Strategy(spark:
SparkSession) extends Strategy wi
properties,
allowExisting,
replace,
+ _,
_) =>
CreateV2ViewExec(
catalog = viewCatalog,
diff --git
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
index aefc672dc8..319ab78a53 100644
---
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
+++
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
@@ -48,6 +48,7 @@ object CheckViews extends (LogicalPlan => Unit) {
_,
_,
replace,
+ _,
_) =>
verifyColumnCount(resolvedIdent, columnAliases, query)
SchemaUtils.checkColumnNameDuplication(
diff --git
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
index 1e0e5e8951..4f7e2b4d0f 100644
---
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
+++
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
@@ -73,6 +73,7 @@ case class ResolveViews(spark: SparkSession) extends
Rule[LogicalPlan] with Look
_,
_,
_,
+ _,
_) if query.resolved && !c.rewritten =>
val aliased = aliasColumns(query, columnAliases, columnComments)
c.copy(
diff --git
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
index c5dadae701..84a00a4a9a 100644
---
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
+++
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
@@ -18,9 +18,12 @@
*/
package org.apache.spark.sql.catalyst.plans.logical.views
-import org.apache.spark.sql.catalyst.plans.logical.BinaryCommand
+import org.apache.spark.sql.catalyst.analysis.AnalysisContext
+import org.apache.spark.sql.catalyst.plans.logical.AnalysisOnlyCommand
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+// Align Iceberg's CreateIcebergView with Spark’s CreateViewCommand by
extending AnalysisOnlyCommand.
+// The command’s children are analyzed then hidden, so the optimizer/planner
won’t traverse the view body.
case class CreateIcebergView(
child: LogicalPlan,
queryText: String,
@@ -32,14 +35,19 @@ case class CreateIcebergView(
properties: Map[String, String],
allowExisting: Boolean,
replace: Boolean,
- rewritten: Boolean = false)
- extends BinaryCommand {
- override def left: LogicalPlan = child
+ rewritten: Boolean = false,
+ isAnalyzed: Boolean = false)
+ extends AnalysisOnlyCommand {
- override def right: LogicalPlan = query
+ override def childrenToAnalyze: Seq[LogicalPlan] = child :: query :: Nil
+
+ override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan =
{
+ copy(isAnalyzed = true)
+ }
override protected def withNewChildrenInternal(
- newLeft: LogicalPlan,
- newRight: LogicalPlan): LogicalPlan =
- copy(child = newLeft, query = newRight)
+ newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+ assert(!isAnalyzed)
+ copy(child = newChildren.head, query = newChildren.last)
+ }
}
diff --git
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
index 64b228a424..6b340b7249 100644
---
a/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
+++
b/spark/v3.5/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
@@ -148,6 +148,7 @@ case class ExtendedDataSourceV2Strategy(spark:
SparkSession) extends Strategy wi
properties,
allowExisting,
replace,
+ _,
_) =>
CreateV2ViewExec(
catalog = viewCatalog,
diff --git
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
index 343fbe7a91..5ad4b9c014 100644
---
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
+++
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/CheckViews.scala
@@ -48,6 +48,7 @@ object CheckViews extends (LogicalPlan => Unit) {
_,
_,
replace,
+ _,
_) =>
verifyColumnCount(resolvedIdent, columnAliases, query)
SchemaUtils.checkColumnNameDuplication(
diff --git
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
index 23d33352f1..ff7d20241b 100644
---
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
+++
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveViews.scala
@@ -73,6 +73,7 @@ case class ResolveViews(spark: SparkSession) extends
Rule[LogicalPlan] with Look
_,
_,
_,
+ _,
_) if query.resolved && !c.rewritten =>
val aliased = aliasColumns(query, columnAliases, columnComments)
c.copy(
diff --git
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
index c5dadae701..84a00a4a9a 100644
---
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
+++
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/views/CreateIcebergView.scala
@@ -18,9 +18,12 @@
*/
package org.apache.spark.sql.catalyst.plans.logical.views
-import org.apache.spark.sql.catalyst.plans.logical.BinaryCommand
+import org.apache.spark.sql.catalyst.analysis.AnalysisContext
+import org.apache.spark.sql.catalyst.plans.logical.AnalysisOnlyCommand
import org.apache.spark.sql.catalyst.plans.logical.LogicalPlan
+// Align Iceberg's CreateIcebergView with Spark’s CreateViewCommand by
extending AnalysisOnlyCommand.
+// The command’s children are analyzed then hidden, so the optimizer/planner
won’t traverse the view body.
case class CreateIcebergView(
child: LogicalPlan,
queryText: String,
@@ -32,14 +35,19 @@ case class CreateIcebergView(
properties: Map[String, String],
allowExisting: Boolean,
replace: Boolean,
- rewritten: Boolean = false)
- extends BinaryCommand {
- override def left: LogicalPlan = child
+ rewritten: Boolean = false,
+ isAnalyzed: Boolean = false)
+ extends AnalysisOnlyCommand {
- override def right: LogicalPlan = query
+ override def childrenToAnalyze: Seq[LogicalPlan] = child :: query :: Nil
+
+ override def markAsAnalyzed(analysisContext: AnalysisContext): LogicalPlan =
{
+ copy(isAnalyzed = true)
+ }
override protected def withNewChildrenInternal(
- newLeft: LogicalPlan,
- newRight: LogicalPlan): LogicalPlan =
- copy(child = newLeft, query = newRight)
+ newChildren: IndexedSeq[LogicalPlan]): LogicalPlan = {
+ assert(!isAnalyzed)
+ copy(child = newChildren.head, query = newChildren.last)
+ }
}
diff --git
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
index 898e6f3d41..da540f5891 100644
---
a/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
+++
b/spark/v4.0/spark-extensions/src/main/scala/org/apache/spark/sql/execution/datasources/v2/ExtendedDataSourceV2Strategy.scala
@@ -140,6 +140,7 @@ case class ExtendedDataSourceV2Strategy(spark:
SparkSession) extends Strategy wi
properties,
allowExisting,
replace,
+ _,
_) =>
CreateV2ViewExec(
catalog = viewCatalog,