HyukjinKwon commented on a change in pull request #25195:
[SPARK-28288][SQL][PYTHON][TESTS] Convert and port 'window.sql' into UDF test
base
URL: https://github.com/apache/spark/pull/25195#discussion_r306589939
##########
File path: sql/core/src/test/resources/sql-tests/inputs/udf/udf-window.sql
##########
@@ -0,0 +1,118 @@
+--This test file was converted from window.sql.
+-- Test data.
+CREATE OR REPLACE TEMPORARY VIEW testData AS SELECT * FROM VALUES
+(null, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
+(1, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), "a"),
+(1, 2L, 2.5D, date("2017-08-02"), timestamp(1502000000), "a"),
+(2, 2147483650L, 100.001D, date("2020-12-31"), timestamp(1609372800), "a"),
+(1, null, 1.0D, date("2017-08-01"), timestamp(1501545600), "b"),
+(2, 3L, 3.3D, date("2017-08-03"), timestamp(1503000000), "b"),
+(3, 2147483650L, 100.001D, date("2020-12-31"), timestamp(1609372800), "b"),
+(null, null, null, null, null, null),
+(3, 1L, 1.0D, date("2017-08-01"), timestamp(1501545600), null)
+AS testData(val, val_long, val_double, val_date, val_timestamp, cate);
+
+-- RowsBetween
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY val ROWS
CURRENT ROW) FROM testData
+ORDER BY cate, val;
+SELECT udf(val), cate, sum(val) OVER(PARTITION BY cate ORDER BY val
+ROWS BETWEEN UNBOUNDED PRECEDING AND 1 FOLLOWING) FROM testData ORDER BY cate,
val;
+SELECT val_long, udf(cate), sum(val_long) OVER(PARTITION BY cate ORDER BY
val_long
+ROWS BETWEEN CURRENT ROW AND CAST(2147483648 AS int) FOLLOWING) FROM testData
ORDER BY cate, val_long;
+
+-- RangeBetween
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY val RANGE 1
PRECEDING) FROM testData
+ORDER BY cate, val;
+SELECT val, udf(cate), sum(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT val_long, udf(cate), sum(val_long) OVER(PARTITION BY cate ORDER BY
val_long
+RANGE BETWEEN CURRENT ROW AND 2147483648 FOLLOWING) FROM testData ORDER BY
cate, val_long;
+SELECT val_double, udf(cate), sum(val_double) OVER(PARTITION BY cate ORDER BY
val_double
+RANGE BETWEEN CURRENT ROW AND 2.5 FOLLOWING) FROM testData ORDER BY cate,
val_double;
+SELECT val_date, udf(cate), max(val_date) OVER(PARTITION BY cate ORDER BY
val_date
+RANGE BETWEEN CURRENT ROW AND 2 FOLLOWING) FROM testData ORDER BY cate,
val_date;
+SELECT val_timestamp, udf(cate), avg(val_timestamp) OVER(PARTITION BY cate
ORDER BY val_timestamp
+RANGE BETWEEN CURRENT ROW AND interval 23 days 4 hours FOLLOWING) FROM testData
+ORDER BY cate, val_timestamp;
+
+-- RangeBetween with reverse OrderBy
+SELECT val, udf(cate), sum(val) OVER(PARTITION BY cate ORDER BY val DESC
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+
+-- Invalid window frame
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate
+ROWS BETWEEN UNBOUNDED FOLLOWING AND CAST(1 as int) FOLLOWING) FROM testData
ORDER BY cate, val;
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate,
udf(val);
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY val, cate
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY
current_timestamp
+RANGE BETWEEN CURRENT ROW AND 1 FOLLOWING) FROM testData ORDER BY cate, val;
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN 1 FOLLOWING AND 1 PRECEDING) FROM testData ORDER BY cate, val;
+SELECT udf(val), cate, count(val) OVER(PARTITION BY cate ORDER BY val
+RANGE BETWEEN CURRENT ROW AND current_date PRECEDING) FROM testData ORDER BY
cate, val;
+
+
+-- Window functions
+SELECT udf(val), cate,
+max(val) OVER w AS max,
Review comment:
Shall we wrap those functions via udf? For instance `udf(min(val))`,
`udf(count(udf(val)))`, `udf(udf(sum(val)))`
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]