This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 0b785b3c773 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list 0b785b3c773 is described below commit 0b785b3c77374fa7736f01bb55e87c796985ae14 Author: Yuming Wang <yumw...@ebay.com> AuthorDate: Tue Jun 14 00:43:20 2022 -0700 [SPARK-39448][SQL] Add `ReplaceCTERefWithRepartition` into `nonExcludableRules` list ### What changes were proposed in this pull request? This PR adds `ReplaceCTERefWithRepartition` into nonExcludableRules list. ### Why are the changes needed? It will throw exception if user `set spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition` before running this query: ```sql SELECT (SELECT avg(id) FROM range(10)), (SELECT sum(id) FROM range(10)), (SELECT count(distinct id) FROM range(10)) ``` Exception: ``` Caused by: java.lang.AssertionError: assertion failed: No plan for WithCTE :- CTERelationDef 0, true : +- Project [named_struct(min(id), min(id)#223L, sum(id), sum(id)#226L, count(DISTINCT id), count(DISTINCT id)#229L) AS mergedValue#240] : +- Aggregate [min(id#221L) AS min(id)#223L, sum(id#221L) AS sum(id)#226L, count(distinct id#221L) AS count(DISTINCT id)#229L] : +- Range (0, 10, step=1, splits=None) +- Project [scalar-subquery#218 [].min(id) AS scalarsubquery()#230L, scalar-subquery#219 [].sum(id) AS scalarsubquery()#231L, scalar-subquery#220 [].count(DISTINCT id) AS scalarsubquery()#232L] : :- CTERelationRef 0, true, [mergedValue#240] : :- CTERelationRef 0, true, [mergedValue#240] : +- CTERelationRef 0, true, [mergedValue#240] +- OneRowRelation ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Unit test. Closes #36847 from wangyum/SPARK-39448. Authored-by: Yuming Wang <yumw...@ebay.com> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> --- .../apache/spark/sql/execution/SparkOptimizer.scala | 3 ++- .../sql-tests/inputs/non-excludable-rule.sql | 6 ++++++ .../sql-tests/results/non-excludable-rule.sql.out | 21 +++++++++++++++++++++ 3 files changed, 29 insertions(+), 1 deletion(-) diff --git a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala index 0e7455009c5..056c16affc2 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/execution/SparkOptimizer.scala @@ -87,7 +87,8 @@ class SparkOptimizer( GroupBasedRowLevelOperationScanPlanning.ruleName :+ V2ScanRelationPushDown.ruleName :+ V2ScanPartitioning.ruleName :+ - V2Writes.ruleName + V2Writes.ruleName :+ + ReplaceCTERefWithRepartition.ruleName /** * Optimization batches that are executed before the regular optimization batches (also before diff --git a/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql new file mode 100644 index 00000000000..b238d199cc1 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/inputs/non-excludable-rule.sql @@ -0,0 +1,6 @@ +-- SPARK-39448 +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition; +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)); diff --git a/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out new file mode 100644 index 00000000000..c7fa2f04152 --- /dev/null +++ b/sql/core/src/test/resources/sql-tests/results/non-excludable-rule.sql.out @@ -0,0 +1,21 @@ +-- Automatically generated by SQLQueryTestSuite +-- Number of queries: 2 + + +-- !query +SET spark.sql.optimizer.excludedRules=org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition +-- !query schema +struct<key:string,value:string> +-- !query output +spark.sql.optimizer.excludedRules org.apache.spark.sql.catalyst.optimizer.ReplaceCTERefWithRepartition + + +-- !query +SELECT + (SELECT min(id) FROM range(10)), + (SELECT sum(id) FROM range(10)), + (SELECT count(distinct id) FROM range(10)) +-- !query schema +struct<scalarsubquery():bigint,scalarsubquery():bigint,scalarsubquery():bigint> +-- !query output +0 45 10 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org