(incubator-gluten) branch main updated: [VL] Remove override of test in GlutenDynamicPartitionPruningSuite (#8575)

yangzy Thu, 23 Jan 2025 22:11:07 -0800

This is an automated email from the ASF dual-hosted git repository.

yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new e8bb7ee55e [VL] Remove override of test in 
GlutenDynamicPartitionPruningSuite (#8575)
e8bb7ee55e is described below

commit e8bb7ee55e3040d61f6f891e6589bb6b1b803ab5
Author: Ankita Victor <[email protected]>
AuthorDate: Fri Jan 24 11:40:57 2025 +0530

    [VL] Remove override of test in GlutenDynamicPartitionPruningSuite (#8575)
---
 .../sql/GlutenDynamicPartitionPruningSuite.scala   | 105 +--------------------
 1 file changed, 1 insertion(+), 104 deletions(-)

diff --git 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
index 4678f881a2..2382386767 100644
--- 
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
+++ 
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
@@ -21,7 +21,6 @@ import org.apache.gluten.execution.{BatchScanExecTransformer, 
FileSourceScanExec
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression, 
Expression}
-import 
org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode.{CODEGEN_ONLY,
 NO_CODEGEN}
 import org.apache.spark.sql.catalyst.plans.ExistenceJoin
 import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
 import org.apache.spark.sql.execution._
@@ -50,9 +49,7 @@ abstract class GlutenDynamicPartitionPruningSuiteBase
     // overwritten with different plan
     "SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec",
     "Make sure dynamic pruning works on uncorrelated queries",
-    "Subquery reuse across the whole plan",
-    // struct join key not supported, fell-back to Vanilla join
-    "SPARK-32659: Fix the data issue when pruning DPP on non-atomic type"
+    "Subquery reuse across the whole plan"
   )
 
   // === Following cases override super class's cases ===
@@ -182,106 +179,6 @@ abstract class GlutenDynamicPartitionPruningSuiteBase
     }
   }
 
-  testGluten("SPARK-32659: Fix the data issue when pruning DPP on non-atomic 
type") {
-    Seq(NO_CODEGEN, CODEGEN_ONLY).foreach {
-      mode =>
-        Seq(true, false).foreach {
-          pruning =>
-            withSQLConf(
-              SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
-              SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
-              Seq("struct", "array").foreach {
-                dataType =>
-                  val df = sql(
-                    s"""
-                       |SELECT f.date_id, f.product_id, f.units_sold, 
f.store_id FROM fact_stats f
-                       |JOIN dim_stats s
-                       |ON $dataType(f.store_id) = $dataType(s.store_id) WHERE 
s.country = 'DE'
-              """.stripMargin)
-
-                  if (pruning) {
-                    df.collect()
-
-                    val plan = df.queryExecution.executedPlan
-                    val dpExprs = collectDynamicPruningExpressions(plan)
-                    val hasSubquery = dpExprs.exists {
-                      case InSubqueryExec(_, _: SubqueryExec, _, _, _, _) => 
true
-                      case _ => false
-                    }
-                    val subqueryBroadcast = dpExprs.collect {
-                      case InSubqueryExec(_, b: SubqueryBroadcastExec, _, _, 
_, _) => b
-                      case InSubqueryExec(_, b: ColumnarSubqueryBroadcastExec, 
_, _, _, _) => b
-                    }
-
-                    val hasFilter = if (false) "Should" else "Shouldn't"
-                    assert(
-                      !hasSubquery,
-                      s"$hasFilter trigger DPP with a subquery 
duplicate:\n${df.queryExecution}")
-                    val hasBroadcast = if (true) "Should" else "Shouldn't"
-                    assert(
-                      subqueryBroadcast.nonEmpty,
-                      s"$hasBroadcast trigger DPP " +
-                        s"with a reused broadcast 
exchange:\n${df.queryExecution}")
-
-                    subqueryBroadcast.foreach {
-                      s =>
-                        s.child match {
-                          case _: ReusedExchangeExec => // reuse check ok.
-                          case BroadcastQueryStageExec(
-                                _,
-                                _: ReusedExchangeExec,
-                                _
-                              ) => // reuse check ok.
-                          case b: BroadcastExchangeLike =>
-                            val hasReuse = plan.find {
-                              case ReusedExchangeExec(_, e) => e eq b
-                              case _ => false
-                            }.isDefined
-                          // assert(hasReuse, s"$s\nshould have been reused 
in\n$plan")
-                          case a: AdaptiveSparkPlanExec =>
-                            val broadcastQueryStage = collectFirst(a) {
-                              case b: BroadcastQueryStageExec => b
-                            }
-                            val broadcastPlan = 
broadcastQueryStage.get.broadcast
-                            val hasReuse = find(plan) {
-                              case ReusedExchangeExec(_, e) => e eq 
broadcastPlan
-                              case b: BroadcastExchangeLike => b eq 
broadcastPlan
-                              case _ => false
-                            }.isDefined
-                          // assert(hasReuse, s"$s\nshould have been reused 
in\n$plan")
-                          case _ =>
-                            fail(s"Invalid child node found in\n$s")
-                        }
-                    }
-
-                    val isMainQueryAdaptive = 
plan.isInstanceOf[AdaptiveSparkPlanExec]
-                    
subqueriesAll(plan).filterNot(subqueryBroadcast.contains).foreach {
-                      s =>
-                        val subquery = s match {
-                          case r: ReusedSubqueryExec => r.child
-                          case o => o
-                        }
-                        assert(
-                          subquery
-                            .find(_.isInstanceOf[AdaptiveSparkPlanExec])
-                            .isDefined == isMainQueryAdaptive)
-                    }
-                  } else {
-                    checkPartitionPruningPredicate(df, false, false)
-                  }
-
-                  checkAnswer(
-                    df,
-                    Row(1030, 2, 10, 3) ::
-                      Row(1040, 2, 50, 3) ::
-                      Row(1050, 2, 50, 3) ::
-                      Row(1060, 2, 50, 3) :: Nil)
-              }
-            }
-        }
-    }
-  }
-
   testGluten("SPARK-38674: Remove useless deduplicate in 
SubqueryBroadcastExec") {
     withTable("duplicate_keys") {
       withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [VL] Remove override of test in GlutenDynamicPartitionPruningSuite (#8575)

Reply via email to