This is an automated email from the ASF dual-hosted git repository.
yangzy pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new e8bb7ee55e [VL] Remove override of test in
GlutenDynamicPartitionPruningSuite (#8575)
e8bb7ee55e is described below
commit e8bb7ee55e3040d61f6f891e6589bb6b1b803ab5
Author: Ankita Victor <[email protected]>
AuthorDate: Fri Jan 24 11:40:57 2025 +0530
[VL] Remove override of test in GlutenDynamicPartitionPruningSuite (#8575)
---
.../sql/GlutenDynamicPartitionPruningSuite.scala | 105 +--------------------
1 file changed, 1 insertion(+), 104 deletions(-)
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
index 4678f881a2..2382386767 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala
@@ -21,7 +21,6 @@ import org.apache.gluten.execution.{BatchScanExecTransformer,
FileSourceScanExec
import org.apache.spark.SparkConf
import org.apache.spark.sql.catalyst.expressions.{DynamicPruningExpression,
Expression}
-import
org.apache.spark.sql.catalyst.expressions.CodegenObjectFactoryMode.{CODEGEN_ONLY,
NO_CODEGEN}
import org.apache.spark.sql.catalyst.plans.ExistenceJoin
import org.apache.spark.sql.connector.catalog.InMemoryTableCatalog
import org.apache.spark.sql.execution._
@@ -50,9 +49,7 @@ abstract class GlutenDynamicPartitionPruningSuiteBase
// overwritten with different plan
"SPARK-38674: Remove useless deduplicate in SubqueryBroadcastExec",
"Make sure dynamic pruning works on uncorrelated queries",
- "Subquery reuse across the whole plan",
- // struct join key not supported, fell-back to Vanilla join
- "SPARK-32659: Fix the data issue when pruning DPP on non-atomic type"
+ "Subquery reuse across the whole plan"
)
// === Following cases override super class's cases ===
@@ -182,106 +179,6 @@ abstract class GlutenDynamicPartitionPruningSuiteBase
}
}
- testGluten("SPARK-32659: Fix the data issue when pruning DPP on non-atomic
type") {
- Seq(NO_CODEGEN, CODEGEN_ONLY).foreach {
- mode =>
- Seq(true, false).foreach {
- pruning =>
- withSQLConf(
- SQLConf.CODEGEN_FACTORY_MODE.key -> mode.toString,
- SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> s"$pruning") {
- Seq("struct", "array").foreach {
- dataType =>
- val df = sql(
- s"""
- |SELECT f.date_id, f.product_id, f.units_sold,
f.store_id FROM fact_stats f
- |JOIN dim_stats s
- |ON $dataType(f.store_id) = $dataType(s.store_id) WHERE
s.country = 'DE'
- """.stripMargin)
-
- if (pruning) {
- df.collect()
-
- val plan = df.queryExecution.executedPlan
- val dpExprs = collectDynamicPruningExpressions(plan)
- val hasSubquery = dpExprs.exists {
- case InSubqueryExec(_, _: SubqueryExec, _, _, _, _) =>
true
- case _ => false
- }
- val subqueryBroadcast = dpExprs.collect {
- case InSubqueryExec(_, b: SubqueryBroadcastExec, _, _,
_, _) => b
- case InSubqueryExec(_, b: ColumnarSubqueryBroadcastExec,
_, _, _, _) => b
- }
-
- val hasFilter = if (false) "Should" else "Shouldn't"
- assert(
- !hasSubquery,
- s"$hasFilter trigger DPP with a subquery
duplicate:\n${df.queryExecution}")
- val hasBroadcast = if (true) "Should" else "Shouldn't"
- assert(
- subqueryBroadcast.nonEmpty,
- s"$hasBroadcast trigger DPP " +
- s"with a reused broadcast
exchange:\n${df.queryExecution}")
-
- subqueryBroadcast.foreach {
- s =>
- s.child match {
- case _: ReusedExchangeExec => // reuse check ok.
- case BroadcastQueryStageExec(
- _,
- _: ReusedExchangeExec,
- _
- ) => // reuse check ok.
- case b: BroadcastExchangeLike =>
- val hasReuse = plan.find {
- case ReusedExchangeExec(_, e) => e eq b
- case _ => false
- }.isDefined
- // assert(hasReuse, s"$s\nshould have been reused
in\n$plan")
- case a: AdaptiveSparkPlanExec =>
- val broadcastQueryStage = collectFirst(a) {
- case b: BroadcastQueryStageExec => b
- }
- val broadcastPlan =
broadcastQueryStage.get.broadcast
- val hasReuse = find(plan) {
- case ReusedExchangeExec(_, e) => e eq
broadcastPlan
- case b: BroadcastExchangeLike => b eq
broadcastPlan
- case _ => false
- }.isDefined
- // assert(hasReuse, s"$s\nshould have been reused
in\n$plan")
- case _ =>
- fail(s"Invalid child node found in\n$s")
- }
- }
-
- val isMainQueryAdaptive =
plan.isInstanceOf[AdaptiveSparkPlanExec]
-
subqueriesAll(plan).filterNot(subqueryBroadcast.contains).foreach {
- s =>
- val subquery = s match {
- case r: ReusedSubqueryExec => r.child
- case o => o
- }
- assert(
- subquery
- .find(_.isInstanceOf[AdaptiveSparkPlanExec])
- .isDefined == isMainQueryAdaptive)
- }
- } else {
- checkPartitionPruningPredicate(df, false, false)
- }
-
- checkAnswer(
- df,
- Row(1030, 2, 10, 3) ::
- Row(1040, 2, 50, 3) ::
- Row(1050, 2, 50, 3) ::
- Row(1060, 2, 50, 3) :: Nil)
- }
- }
- }
- }
- }
-
testGluten("SPARK-38674: Remove useless deduplicate in
SubqueryBroadcastExec") {
withTable("duplicate_keys") {
withSQLConf(SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true") {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]