dtenedor commented on code in PR #48649:
URL: https://github.com/apache/spark/pull/48649#discussion_r1828275133
##########
sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out:
##########
@@ -650,6 +650,22 @@ org.apache.spark.sql.AnalysisException
}
+-- !query
+table t
+|> where first_value(x) over w = 1
+ window w as (partition by y)
+-- !query schema
+struct<>
+-- !query output
+org.apache.spark.sql.AnalysisException
+{
+ "errorClass" : "_LEGACY_ERROR_TEMP_1034",
Review Comment:
do you think we could also add `windowClause?` to the `whereClause` in the
`operatorPipeRightSide`, just for purposes of throwing a better error message
in this case? It could look something like:
```
operatorPipeRightSide
: selectClause windowClause?
// Note that the WINDOW clause is not allowed in the WHERE pipe
operator, but we add it here in
// the grammar simply for purposes of catching this invalid syntax and
throwing a specific
// dedicated error message.
| whereClause windowClause?
```
and in the AstBuilder:
```
}.getOrElse(Option(ctx.whereClause).map { c =>
if (ctx.windowClause != nullptr) {
throw
QueryParsingErrors.windowClauseInOperatorPipeWhereClauseNotAllowedError(ctx)
}
withWhereClause(c, withSubqueryAlias())
```
##########
sql/core/src/test/resources/sql-tests/results/pipe-operators.sql.out:
##########
@@ -2442,6 +2411,238 @@ org.apache.spark.sql.catalyst.ExtendedAnalysisException
}
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+ window w as (partition by cate order by val)
+-- !query schema
+struct<cate:string,sum(val) OVER (PARTITION BY cate ORDER BY val ASC NULLS
FIRST RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+NULL 3
+NULL NULL
+a 2
+a 2
+a 4
+a NULL
+b 1
+b 3
+b 6
+
+
+-- !query
+table windowTestData
+|> select cate, sum(val) over w
+ window w as (order by val_timestamp range between unbounded preceding and
current row)
+-- !query schema
+struct<cate:string,sum(val) OVER (ORDER BY val_timestamp ASC NULLS FIRST RANGE
BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW):bigint>
+-- !query output
+NULL 5
+NULL NULL
+a 13
+a 5
+a 5
+a 6
+b 13
+b 5
+b 8
+
+
+-- !query
+table windowTestData
+|> select cate, val
+ window w as (partition by cate order by val)
+-- !query schema
+struct<cate:string,val:int>
+-- !query output
+NULL 3
+NULL NULL
+a 1
+a 1
+a 2
+a NULL
+b 1
+b 2
+b 3
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+ window w as (partition by cate)
+|> select cate, val, sum_val, first_value(cate) over w
+ window w as (partition by val)
+-- !query schema
+struct<cate:string,val:int,sum_val:bigint,first_value(cate) OVER (PARTITION BY
val ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING):string>
+-- !query output
+NULL 3 3 NULL
+NULL NULL 3 NULL
+a 1 4 a
+a 1 4 a
+a 2 4 a
+a NULL 4 NULL
+b 1 6 a
+b 2 6 a
+b 3 6 NULL
+
+
+-- !query
+table windowTestData
+|> select cate, val, sum(val) over w1, first_value(cate) over w2
+ window w1 as (partition by cate), w2 as (partition by val)
Review Comment:
The `first_value` result is nondeterministic since the window `w2` has no
ordering property. Please add an `order by` clause inside the window definition
and regenerate the test result to make the test cases deterministic.
##########
sql/core/src/test/resources/sql-tests/inputs/pipe-operators.sql:
##########
@@ -821,6 +819,84 @@ select 1 x, 2 y, 3 z
table other
|> aggregate b group by a;
+-- WINDOW operators (within SELECT): positive tests.
+---------------------------------------------------
+
+-- SELECT with a WINDOW clause.
+table windowTestData
+|> select cate, sum(val) over w
+ window w as (partition by cate order by val);
+
+-- SELECT with RANGE BETWEEN as part of the window definition.
+table windowTestData
+|> select cate, sum(val) over w
+ window w as (order by val_timestamp range between unbounded preceding and
current row);
+
+-- SELECT with a WINDOW clause not being referred in the SELECT list.
+table windowTestData
+|> select cate, val
+ window w as (partition by cate order by val);
+
+-- multiple SELECT clauses, each with a WINDOW clause (with the same window
definition names).
+table windowTestData
+|> select cate, val, sum(val) over w as sum_val
+ window w as (partition by cate)
+|> select cate, val, sum_val, first_value(cate) over w
Review Comment:
The `first_value` result is nondeterministic since the window `w` has no
ordering property. Please add an `order by` clause inside the window definition
and regenerate the test result to make the test cases deterministic.
Same for each of the other test cases below using `first_value` on a window
definition with no ordering requirement.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]