[spark] branch branch-3.4 updated: Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"

dongjoon Thu, 24 Aug 2023 10:54:47 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.4
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.4 by this push:
     new 6b6a0d501b6 Revert "[SPARK-44934][SQL] Use outputSet instead of output 
to check if column pruning occurred in 
PushdownPredicateAndPruneColumnsForCTEDef"
6b6a0d501b6 is described below

commit 6b6a0d501b65924c836abc1677267cc292566c80
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Aug 24 10:54:19 2023 -0700

    Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if 
column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"
    
    This reverts commit b359f6ce32cb1a8b9607c773e2534b05ecf4cdc5.
---
 ...ushdownPredicatesAndPruneColumnsForCTEDef.scala |  2 +-
 .../org/apache/spark/sql/CTEInlineSuite.scala      | 33 ----------------------
 2 files changed, 1 insertion(+), 34 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index e643a1af363..f351ba0b39a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -143,7 +143,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends 
Rule[LogicalPlan] {
 
     case cteRef @ CTERelationRef(cteId, _, output, _) =>
       val (cteDef, _, _, newAttrSet) = cteMap(cteId)
-      if (needsPruning(cteDef.child, newAttrSet)) {
+      if (newAttrSet.size < output.size) {
         val indices = newAttrSet.toSeq.map(cteDef.output.indexOf)
         val newOutput = indices.map(output)
         cteRef.copy(output = newOutput)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 3400434d832..c7c09bf7c79 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -645,39 +645,6 @@ abstract class CTEInlineSuiteBase
       }
     }
   }
-
-  test("SPARK-44934: CTE column pruning handles duplicate exprIds in CTE") {
-    withTempView("t") {
-      Seq((0, 1, 2), (1, 2, 3)).toDF("c1", "c2", 
"c3").createOrReplaceTempView("t")
-      val query =
-        """
-          |with cte as (
-          |  select c1, c1, c2, c3 from t where random() > 0
-          |)
-          |select cte.c1, cte2.c1, cte.c2, cte2.c3 from
-          |  (select c1, c2 from cte) cte
-          |    inner join
-          |  (select c1, c3 from cte) cte2
-          |    on cte.c1 = cte2.c1
-          """.stripMargin
-
-      val df = sql(query)
-      checkAnswer(df, Row(0, 0, 1, 2) :: Row(1, 1, 2, 3) :: Nil)
-      assert(
-        df.queryExecution.analyzed.collect {
-          case WithCTE(_, cteDefs) => cteDefs
-        }.head.length == 1,
-        "With-CTE should contain 1 CTE def after analysis.")
-      val cteRepartitions = df.queryExecution.optimizedPlan.collect {
-        case r: RepartitionOperation => r
-      }
-      assert(cteRepartitions.length == 2,
-        "CTE should not be inlined after optimization.")
-      assert(cteRepartitions.head.collectFirst {
-        case p: Project if p.projectList.length == 4 => p
-      }.isDefined, "CTE columns should not be pruned.")
-    }
-  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with 
DisableAdaptiveExecutionSuite


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.4 updated: Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"

Reply via email to