This is an automated email from the ASF dual-hosted git repository. maxgekk pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 9353d67f929 [SPARK-43851][SQL] Support LCA in grouping expressions 9353d67f929 is described below commit 9353d67f9290bae1e7d7e16a2caf5256cc4e2f92 Author: Jia Fan <fanjiaemi...@qq.com> AuthorDate: Sat Jul 1 08:48:10 2023 +0300 [SPARK-43851][SQL] Support LCA in grouping expressions ### What changes were proposed in this pull request? This PR bring support lateral column alias reference in grouping expressions. ### Why are the changes needed? add new feature for LCA ### Does this PR introduce _any_ user-facing change? No ### How was this patch tested? exist test Closes #41804 from Hisoka-X/SPARK-43851_LCA_in_group. Authored-by: Jia Fan <fanjiaemi...@qq.com> Signed-off-by: Max Gekk <max.g...@gmail.com> --- .../src/main/resources/error/error-classes.json | 5 ----- ...r-conditions-unsupported-feature-error-class.md | 4 ---- .../analysis/ResolveReferencesInAggregate.scala | 22 ++++++++---------- .../column-resolution-aggregate.sql.out | 26 +++++++++++++--------- .../results/column-resolution-aggregate.sql.out | 16 ++++--------- 5 files changed, 29 insertions(+), 44 deletions(-) diff --git a/common/utils/src/main/resources/error/error-classes.json b/common/utils/src/main/resources/error/error-classes.json index 3cc35d668e0..eabd5533e13 100644 --- a/common/utils/src/main/resources/error/error-classes.json +++ b/common/utils/src/main/resources/error/error-classes.json @@ -2530,11 +2530,6 @@ "Referencing lateral column alias <lca> in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list." ] }, - "LATERAL_COLUMN_ALIAS_IN_GROUP_BY" : { - "message" : [ - "Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet." - ] - }, "LATERAL_COLUMN_ALIAS_IN_WINDOW" : { "message" : [ "Referencing a lateral column alias <lca> in window expression <windowExpr>." diff --git a/docs/sql-error-conditions-unsupported-feature-error-class.md b/docs/sql-error-conditions-unsupported-feature-error-class.md index 64d7eb347e5..78bf301c49d 100644 --- a/docs/sql-error-conditions-unsupported-feature-error-class.md +++ b/docs/sql-error-conditions-unsupported-feature-error-class.md @@ -65,10 +65,6 @@ Referencing a lateral column alias `<lca>` in the aggregate function `<aggFunc>` Referencing lateral column alias `<lca>` in the aggregate query both with window expressions and with having clause. Please rewrite the aggregate query by removing the having clause or removing lateral alias reference in the SELECT list. -## LATERAL_COLUMN_ALIAS_IN_GROUP_BY - -Referencing a lateral column alias via GROUP BY alias/ALL is not supported yet. - ## LATERAL_COLUMN_ALIAS_IN_WINDOW Referencing a lateral column alias `<lca>` in window expression `<windowExpr>`. diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala index 09ae87b071f..41bcb337c67 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/ResolveReferencesInAggregate.scala @@ -17,9 +17,8 @@ package org.apache.spark.sql.catalyst.analysis -import org.apache.spark.sql.AnalysisException import org.apache.spark.sql.catalyst.SQLConfHelper -import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, NamedExpression} +import org.apache.spark.sql.catalyst.expressions.{AliasHelper, Attribute, Expression, LateralColumnAliasReference, NamedExpression} import org.apache.spark.sql.catalyst.expressions.aggregate.AggregateExpression import org.apache.spark.sql.catalyst.plans.logical.{Aggregate, AppendColumns, LogicalPlan} import org.apache.spark.sql.catalyst.trees.TreePattern.{LATERAL_COLUMN_ALIAS_REFERENCE, UNRESOLVED_ATTRIBUTE} @@ -74,12 +73,6 @@ object ResolveReferencesInAggregate extends SQLConfHelper resolvedAggExprsWithOuter, resolveGroupByAlias(resolvedAggExprsWithOuter, resolvedGroupExprsNoOuter) ).map(resolveOuterRef) - // TODO: currently we don't support LCA in `groupingExpressions` yet. - if (resolved.exists(_.containsPattern(LATERAL_COLUMN_ALIAS_REFERENCE))) { - throw new AnalysisException( - errorClass = "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY", - messageParameters = Map.empty) - } resolved } else { // Do not resolve columns in grouping expressions to outer references here, as the aggregate @@ -112,9 +105,11 @@ object ResolveReferencesInAggregate extends SQLConfHelper assert(selectList.forall(_.resolved)) if (conf.groupByAliases) { groupExprs.map { g => - g.transformWithPruning(_.containsPattern(UNRESOLVED_ATTRIBUTE)) { - case u: UnresolvedAttribute => - selectList.find(ne => conf.resolver(ne.name, u.name)).getOrElse(u) + g.transformWithPruning(_.containsAnyPattern(UNRESOLVED_ATTRIBUTE, + LATERAL_COLUMN_ALIAS_REFERENCE)) { + case u @ (_: UnresolvedAttribute | _: LateralColumnAliasReference) => + selectList.find(ne => conf.resolver(ne.name, u.asInstanceOf[NamedExpression].name)) + .getOrElse(u) } } } else { @@ -133,8 +128,9 @@ object ResolveReferencesInAggregate extends SQLConfHelper // tell the user in checkAnalysis that we cannot resolve the all in group by. groupExprs } else { - // This is a valid GROUP BY ALL aggregate. - expandedGroupExprs.get + // This is a valid GROUP BY ALL aggregate, resolve group by alias again to transform the + // LCA reference + resolveGroupByAlias(selectList, expandedGroupExprs.get) } } else { groupExprs diff --git a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out index eb30443cbae..3dab6c386cb 100644 --- a/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out +++ b/sql/core/src/test/resources/sql-tests/analyzer-results/column-resolution-aggregate.sql.out @@ -94,21 +94,27 @@ org.apache.spark.sql.AnalysisException -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col -- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY", - "sqlState" : "0A000" -} +Project [lca#x, (lca#x + 1) AS col#x] ++- Project [k#x, k#x AS lca#x] + +- Aggregate [k#x, (k#x + 1)], [k#x] + +- SubqueryAlias v1 + +- View (`v1`, [a#x,b#x,k#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] + +- SubqueryAlias t + +- LocalRelation [a#x, b#x, k#x] -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all -- !query analysis -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY", - "sqlState" : "0A000" -} +Project [lca#x, (lca#x + 1) AS col#x] ++- Project [k#x, k#x AS lca#x] + +- Aggregate [k#x, (k#x + 1)], [k#x] + +- SubqueryAlias v1 + +- View (`v1`, [a#x,b#x,k#x]) + +- Project [cast(a#x as int) AS a#x, cast(b#x as int) AS b#x, cast(k#x as int) AS k#x] + +- SubqueryAlias t + +- LocalRelation [a#x, b#x, k#x] -- !query diff --git a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out index e8ab766751c..e0bbcae91f1 100644 --- a/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out +++ b/sql/core/src/test/resources/sql-tests/results/column-resolution-aggregate.sql.out @@ -91,25 +91,17 @@ org.apache.spark.sql.AnalysisException -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY k, col -- !query schema -struct<> +struct<lca:int,col:int> -- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY", - "sqlState" : "0A000" -} +1 2 -- !query SELECT k AS lca, lca + 1 AS col FROM v1 GROUP BY all -- !query schema -struct<> +struct<lca:int,col:int> -- !query output -org.apache.spark.sql.AnalysisException -{ - "errorClass" : "UNSUPPORTED_FEATURE.LATERAL_COLUMN_ALIAS_IN_GROUP_BY", - "sqlState" : "0A000" -} +1 2 -- !query --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org