This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.3 by this push:
new 954188104ba Revert "[SPARK-44934][SQL] Use outputSet instead of output
to check if column pruning occurred in
PushdownPredicateAndPruneColumnsForCTEDef"
954188104ba is described below
commit 954188104ba277567dc72bd1d6c81bbdf1558135
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Aug 24 11:02:47 2023 -0700
Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if
column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"
This reverts commit d529d32c5c06d5bbc34c91f70b0f3d8281ed5347.
---
...ushdownPredicatesAndPruneColumnsForCTEDef.scala | 2 +-
.../org/apache/spark/sql/CTEInlineSuite.scala | 33 ----------------------
2 files changed, 1 insertion(+), 34 deletions(-)
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index ffe897d1764..2195eef2fc9 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -142,7 +142,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends
Rule[LogicalPlan] {
case cteRef @ CTERelationRef(cteId, _, output, _) =>
val (cteDef, _, _, newAttrSet) = cteMap(cteId)
- if (needsPruning(cteDef.child, newAttrSet)) {
+ if (newAttrSet.size < output.size) {
val indices = newAttrSet.toSeq.map(cteDef.output.indexOf)
val newOutput = indices.map(output)
cteRef.copy(output = newOutput)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 88109f25659..e758c6f8df5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -639,39 +639,6 @@ abstract class CTEInlineSuiteBase
}
}
}
-
- test("SPARK-44934: CTE column pruning handles duplicate exprIds in CTE") {
- withTempView("t") {
- Seq((0, 1, 2), (1, 2, 3)).toDF("c1", "c2",
"c3").createOrReplaceTempView("t")
- val query =
- """
- |with cte as (
- | select c1, c1, c2, c3 from t where random() > 0
- |)
- |select cte.c1, cte2.c1, cte.c2, cte2.c3 from
- | (select c1, c2 from cte) cte
- | inner join
- | (select c1, c3 from cte) cte2
- | on cte.c1 = cte2.c1
- """.stripMargin
-
- val df = sql(query)
- checkAnswer(df, Row(0, 0, 1, 2) :: Row(1, 1, 2, 3) :: Nil)
- assert(
- df.queryExecution.analyzed.collect {
- case WithCTE(_, cteDefs) => cteDefs
- }.head.length == 1,
- "With-CTE should contain 1 CTE def after analysis.")
- val cteRepartitions = df.queryExecution.optimizedPlan.collect {
- case r: RepartitionOperation => r
- }
- assert(cteRepartitions.length == 2,
- "CTE should not be inlined after optimization.")
- assert(cteRepartitions.head.collectFirst {
- case p: Project if p.projectList.length == 4 => p
- }.isDefined, "CTE columns should not be pruned.")
- }
- }
}
class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with
DisableAdaptiveExecutionSuite
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]