jinchengchenghh commented on code in PR #11212:
URL:
https://github.com/apache/incubator-gluten/pull/11212#discussion_r2573246548
##########
gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDynamicPartitionPruningSuite.scala:
##########
@@ -577,64 +572,6 @@ class GlutenDynamicPartitionPruningV1SuiteAEOff
}
}
}
-
- // TODO: fix in Spark-4.0
- ignoreGluten(
- "Subquery reuse across the whole plan",
- DisableAdaptiveExecution("DPP in AQE must reuse broadcast")) {
- withSQLConf(
- SQLConf.DYNAMIC_PARTITION_PRUNING_ENABLED.key -> "true",
- SQLConf.DYNAMIC_PARTITION_PRUNING_REUSE_BROADCAST_ONLY.key -> "false",
- SQLConf.EXCHANGE_REUSE_ENABLED.key -> "false"
- ) {
- withTable("df1", "df2") {
- spark
- .range(100)
- .select(col("id"), col("id").as("k"))
- .write
- .partitionBy("k")
- .format(tableFormat)
- .mode("overwrite")
- .saveAsTable("df1")
-
- spark
- .range(10)
- .select(col("id"), col("id").as("k"))
- .write
- .partitionBy("k")
- .format(tableFormat)
- .mode("overwrite")
- .saveAsTable("df2")
-
- val df = sql("""
- |SELECT df1.id, df2.k
- |FROM df1 JOIN df2 ON df1.k = df2.k
- |WHERE df2.id < (SELECT max(id) FROM df2 WHERE id <= 2)
- |""".stripMargin)
-
- checkPartitionPruningPredicate(df, true, false)
-
- checkAnswer(df, Row(0, 0) :: Row(1, 1) :: Nil)
-
- val plan = df.queryExecution.executedPlan
-
- val subqueryIds = plan.collectWithSubqueries { case s: SubqueryExec =>
s.id }
- val reusedSubqueryIds = plan.collectWithSubqueries {
- case rs: ReusedSubqueryExec => rs.child.id
- }
-
- // By default Gluten pushes more filters than vanilla Spark.
- //
- // See also
org.apache.gluten.execution.FilterHandler#applyFilterPushdownToScan
- // See also DynamicPartitionPruningSuite.scala:1362
- assert(subqueryIds.size == 3, "Whole plan subquery reusing not working
correctly")
Review Comment:
Maybe the previous version has some bugs, the filter should not affect
ReusedSubquery, Gluten with Spark4.0 result is same with jvm Spark
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]