dtenedor commented on code in PR #48649:
URL: https://github.com/apache/spark/pull/48649#discussion_r1824998618
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/parser/AstBuilder.scala:
##########
@@ -5874,53 +5886,72 @@ class AstBuilder extends DataTypeAstBuilder
if (!SQLConf.get.getConf(SQLConf.OPERATOR_PIPE_SYNTAX_ENABLED)) {
operationNotAllowed("Operator pipe SQL syntax using |>", ctx)
}
- // This helper function adds a table subquery boundary between the new
operator to be added
- // (such as a filter or sort) and the input plan if one does not already
exist. This helps the
- // analyzer behave as if we had added the corresponding SQL clause after a
table subquery
- // containing the input plan.
- def withSubqueryAlias(): LogicalPlan = left match {
- case s: SubqueryAlias =>
- s
- case u: UnresolvedRelation =>
- u
- case _ =>
- SubqueryAlias(SubqueryAlias.generateSubqueryName(), left)
- }
- Option(ctx.selectClause).map { c =>
- withSelectQuerySpecification(
- ctx = ctx,
- selectClause = c,
- lateralView = new java.util.ArrayList[LateralViewContext](),
- whereClause = null,
- aggregationClause = null,
- havingClause = null,
- windowClause = null,
- relation = left,
- isPipeOperatorSelect = true)
- }.getOrElse(Option(ctx.whereClause).map { c =>
- withWhereClause(c, withSubqueryAlias())
- }.getOrElse(Option(ctx.pivotClause()).map { c =>
- if (ctx.unpivotClause() != null) {
- throw
QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
- }
- withPivot(c, left)
- }.getOrElse(Option(ctx.unpivotClause()).map { c =>
- if (ctx.pivotClause() != null) {
- throw
QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+
+ // Extract the base child and a list of WithWindowDefinition nodes from
the plan
+ var baseChild = left
+ var withWindowDefinitions = List.empty[WithWindowDefinition]
+ while (baseChild.isInstanceOf[WithWindowDefinition]) {
+ val wwd = baseChild.asInstanceOf[WithWindowDefinition]
+ withWindowDefinitions = withWindowDefinitions :+ wwd
+ baseChild = wwd.child
+ }
+
+ // Process the base child
+ val newChild = {
+ // This helper function adds a table subquery boundary between the new
operator to be added
+ // (such as a filter or sort) and the input plan if one does not already
exist. This helps the
+ // analyzer behave as if we had added the corresponding SQL clause after
a table subquery
+ // containing the input plan.
+ def withSubqueryAlias(): LogicalPlan = baseChild match {
+ case s: SubqueryAlias =>
+ s
+ case u: UnresolvedRelation =>
+ u
+ case _ =>
+ SubqueryAlias(SubqueryAlias.generateSubqueryName(), baseChild)
}
- withUnpivot(c, left)
- }.getOrElse(Option(ctx.sample).map { c =>
- withSample(c, left)
- }.getOrElse(Option(ctx.joinRelation()).map { c =>
- withJoinRelation(c, left)
- }.getOrElse(Option(ctx.operator).map { c =>
- val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
- visitSetOperationImpl(left, plan(ctx.right), all, c.getType)
- }.getOrElse(Option(ctx.queryOrganization).map { c =>
- withQueryResultClauses(c, withSubqueryAlias(), forPipeOperators = true)
- }.getOrElse(
- visitOperatorPipeAggregate(ctx, left)
- ))))))))
+
+ Option(ctx.selectClause).map { c =>
+ withSelectQuerySpecification(
+ ctx = ctx,
+ selectClause = c,
+ lateralView = new java.util.ArrayList[LateralViewContext](),
+ whereClause = null,
+ aggregationClause = null,
+ havingClause = null,
+ windowClause = null,
+ relation = baseChild,
+ isPipeOperatorSelect = true)
+ }.getOrElse(Option(ctx.whereClause).map { c =>
+ withWhereClause(c, withSubqueryAlias())
+ }.getOrElse(Option(ctx.pivotClause()).map { c =>
+ if (ctx.unpivotClause() != null) {
+ throw
QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+ }
+ withPivot(c, baseChild)
+ }.getOrElse(Option(ctx.unpivotClause()).map { c =>
+ if (ctx.pivotClause() != null) {
+ throw
QueryParsingErrors.unpivotWithPivotInFromClauseNotAllowedError(ctx)
+ }
+ withUnpivot(c, baseChild)
+ }.getOrElse(Option(ctx.sample).map { c =>
+ withSample(c, baseChild)
+ }.getOrElse(Option(ctx.joinRelation()).map { c =>
+ withJoinRelation(c, baseChild)
+ }.getOrElse(Option(ctx.operator).map { c =>
+ val all = Option(ctx.setQuantifier()).exists(_.ALL != null)
+ visitSetOperationImpl(baseChild, plan(ctx.right), all, c.getType)
+ }.getOrElse(Option(ctx.queryOrganization).map { c =>
+ withQueryResultClauses(c, withSubqueryAlias(), forPipeOperators = true)
+ }.getOrElse(
+ visitOperatorPipeAggregate(ctx, left)
+ ))))))))
+ }
+
+ // Reconstruct the WithWindowDefinition nodes on top of the new child
Review Comment:
Update on this: we talked about this with Jeff Shute from the original
research paper [1]. We find that having `|> WINDOW` as a separate pipe operator
breaks the composability of the operators. Therefore, we should actually add
the `WINDOW` clause as optional at the end of existing operators that support
expressions, like `|> SELECT` and `|> WHERE`. For example:
```
from windowTestData
|> select cate, sum(val) over w
window w as (partition by cate order by val)
|> where cate = "a";
```
Then we don't have this consideration in the parser at all about checking
for multiple `WINDOW` clauses, since the grammar would only allow at most a
single optional one.
Sorry if this is extra work, but the implementation should hopefully become
simpler after this :)
[1]
https://research.google/pubs/sql-has-problems-we-can-fix-them-pipe-syntax-in-sql/
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]