[spark] branch branch-3.3 updated: Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"

dongjoon Thu, 24 Aug 2023 11:03:13 -0700

This is an automated email from the ASF dual-hosted git repository.

dongjoon pushed a commit to branch branch-3.3
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/branch-3.3 by this push:
     new 954188104ba Revert "[SPARK-44934][SQL] Use outputSet instead of output 
to check if column pruning occurred in 
PushdownPredicateAndPruneColumnsForCTEDef"
954188104ba is described below

commit 954188104ba277567dc72bd1d6c81bbdf1558135
Author: Dongjoon Hyun <[email protected]>
AuthorDate: Thu Aug 24 11:02:47 2023 -0700

    Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if 
column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"
    
    This reverts commit d529d32c5c06d5bbc34c91f70b0f3d8281ed5347.
---
 ...ushdownPredicatesAndPruneColumnsForCTEDef.scala |  2 +-
 .../org/apache/spark/sql/CTEInlineSuite.scala      | 33 ----------------------
 2 files changed, 1 insertion(+), 34 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
index ffe897d1764..2195eef2fc9 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/optimizer/PushdownPredicatesAndPruneColumnsForCTEDef.scala
@@ -142,7 +142,7 @@ object PushdownPredicatesAndPruneColumnsForCTEDef extends 
Rule[LogicalPlan] {
 
     case cteRef @ CTERelationRef(cteId, _, output, _) =>
       val (cteDef, _, _, newAttrSet) = cteMap(cteId)
-      if (needsPruning(cteDef.child, newAttrSet)) {
+      if (newAttrSet.size < output.size) {
         val indices = newAttrSet.toSeq.map(cteDef.output.indexOf)
         val newOutput = indices.map(output)
         cteRef.copy(output = newOutput)
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
index 88109f25659..e758c6f8df5 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/CTEInlineSuite.scala
@@ -639,39 +639,6 @@ abstract class CTEInlineSuiteBase
       }
     }
   }
-
-  test("SPARK-44934: CTE column pruning handles duplicate exprIds in CTE") {
-    withTempView("t") {
-      Seq((0, 1, 2), (1, 2, 3)).toDF("c1", "c2", 
"c3").createOrReplaceTempView("t")
-      val query =
-        """
-          |with cte as (
-          |  select c1, c1, c2, c3 from t where random() > 0
-          |)
-          |select cte.c1, cte2.c1, cte.c2, cte2.c3 from
-          |  (select c1, c2 from cte) cte
-          |    inner join
-          |  (select c1, c3 from cte) cte2
-          |    on cte.c1 = cte2.c1
-          """.stripMargin
-
-      val df = sql(query)
-      checkAnswer(df, Row(0, 0, 1, 2) :: Row(1, 1, 2, 3) :: Nil)
-      assert(
-        df.queryExecution.analyzed.collect {
-          case WithCTE(_, cteDefs) => cteDefs
-        }.head.length == 1,
-        "With-CTE should contain 1 CTE def after analysis.")
-      val cteRepartitions = df.queryExecution.optimizedPlan.collect {
-        case r: RepartitionOperation => r
-      }
-      assert(cteRepartitions.length == 2,
-        "CTE should not be inlined after optimization.")
-      assert(cteRepartitions.head.collectFirst {
-        case p: Project if p.projectList.length == 4 => p
-      }.isDefined, "CTE columns should not be pruned.")
-    }
-  }
 }
 
 class CTEInlineSuiteAEOff extends CTEInlineSuiteBase with 
DisableAdaptiveExecutionSuite


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[spark] branch branch-3.3 updated: Revert "[SPARK-44934][SQL] Use outputSet instead of output to check if column pruning occurred in PushdownPredicateAndPruneColumnsForCTEDef"

Reply via email to