jackwener commented on code in PR #5330:
URL: https://github.com/apache/arrow-datafusion/pull/5330#discussion_r1115303018
##########
datafusion/core/tests/sqllogictests/test_files/window.slt:
##########
@@ -398,3 +408,274 @@ WITH _sample_data AS (
----
aa 3 2
bb 7 2
+
+
+# async fn window_in_expression() -> Result<()> {
+query I
+select 1 - lag(amount, 1) over (order by idx) as column1 from (values ('a', 1,
100), ('a', 2, 150)) as t (col1, idx, amount)
+---
+----
+NULL
+-99
+
+
+# async fn window_with_agg_in_expression() -> Result<()> {
+query TIIIII
+select col1, idx, count(*), sum(amount), lag(sum(amount), 1) over (order by
idx) as prev_amount,
+sum(amount) - lag(sum(amount), 1) over (order by idx) as difference from (
+select * from (values ('a', 1, 100), ('a', 2, 150)) as t (col1, idx, amount)
+) a
+group by col1, idx
+----
+a 1 1 100 NULL NULL
+a 2 1 150 100 50
+
+
+# async fn window_frame_empty() -> Result<()> {
+query II
+SELECT
+SUM(c3) OVER() as sum1,
+COUNT(*) OVER () as count1
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+781 100
+781 100
+781 100
+781 100
+781 100
+
+# async fn window_frame_rows_preceding() -> Result<()> {
+query IRI
+SELECT
+SUM(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+AVG(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+COUNT(*) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+-48302 -16100.666666666666 3
+11243 3747.666666666667 3
+-51311 -17103.666666666668 3
+-2391 -797 3
+46756 15585.333333333334 3
+
+
+# async fn window_frame_rows_preceding_stddev_variance() -> Result<()> {
+query RRRR
+SELECT
+VAR(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+VAR_POP(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+STDDEV(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+STDDEV_POP(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+46721.33333333174 31147.555555554496 216.151181660734 176.486700789477
+2639429.333333332 1759619.5555555548 1624.632060908971 1326.50652299774
+746202.3333333324 497468.2222222216 863.830037295146 705.314271954156
+768422.9999999981 512281.9999999988 876.597399037893 715.738779164577
+66526.3333333288 44350.88888888587 257.926992254259 210.596507304575
+
+# async fn window_frame_rows_preceding_with_partition_unique_order_by() ->
Result<()> {
+query IRI
+SELECT
+SUM(c4) OVER(PARTITION BY c1 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING),
+AVG(c4) OVER(PARTITION BY c1 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING),
+COUNT(*) OVER(PARTITION BY c2 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+-38611 -19305.5 2
+17547 8773.5 2
+-1301 -650.5 2
+26638 13319 3
+26861 8953.666666666666 3
+
+# /// The partition by clause conducts sorting according to given partition
column by default. If the
+# /// sorting columns have non unique values, the unstable sorting may produce
indeterminate results.
+# /// Therefore, we are commenting out the following test for now.
+
+#// #[tokio::test]
+#// async fn window_frame_rows_preceding_with_non_unique_partition() ->
Result<()> {
+#// let ctx = SessionContext::new();
+#// register_aggregate_csv(&ctx).await?;
+#// let sql = "SELECT
+#// SUM(c4) OVER(PARTITION BY c1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+#// COUNT(*) OVER(PARTITION BY c2 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+#// FROM aggregate_test_100
+#// ORDER BY c9
+#// LIMIT 5
+#// let actual = execute_to_batches(&ctx, sql).await;
+#// let expected = vec![
+#// "+----------------------------+-----------------+",
+#// "| SUM(aggregate_test_100.c4) | COUNT(UInt8(1)) |",
+#// "+----------------------------+-----------------+",
+#// "| -33822 | 3|",
+#// "| 20808 | 3|",
+#// "| -29881 | 3|",
+#// "| -47613 | 3|",
+#// "| -13474 | 3|",
+#// "+----------------------------+-----------------+",
+#// ];
+#// assert_batches_eq!(expected, &actual);
+#// Ok(())
+#// }
+
+# async fn window_frame_ranges_preceding_following_desc() -> Result<()> {
+query error DataFusion error: Internal error: Operator + is not implemented
for types Int8(5) and Utf8("1"). This was likely caused by a bug in
DataFusion's code and we would welcome that you file an bug report in our issue
tracker
Review Comment:
I think it's a bug, I also meet it in other code when disable `skip_failed`
##########
datafusion/core/tests/sqllogictests/test_files/window.slt:
##########
@@ -398,3 +408,274 @@ WITH _sample_data AS (
----
aa 3 2
bb 7 2
+
+
+# async fn window_in_expression() -> Result<()> {
+query I
+select 1 - lag(amount, 1) over (order by idx) as column1 from (values ('a', 1,
100), ('a', 2, 150)) as t (col1, idx, amount)
+---
+----
+NULL
+-99
+
+
+# async fn window_with_agg_in_expression() -> Result<()> {
+query TIIIII
+select col1, idx, count(*), sum(amount), lag(sum(amount), 1) over (order by
idx) as prev_amount,
+sum(amount) - lag(sum(amount), 1) over (order by idx) as difference from (
+select * from (values ('a', 1, 100), ('a', 2, 150)) as t (col1, idx, amount)
+) a
+group by col1, idx
+----
+a 1 1 100 NULL NULL
+a 2 1 150 100 50
+
+
+# async fn window_frame_empty() -> Result<()> {
+query II
+SELECT
+SUM(c3) OVER() as sum1,
+COUNT(*) OVER () as count1
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+781 100
+781 100
+781 100
+781 100
+781 100
+
+# async fn window_frame_rows_preceding() -> Result<()> {
+query IRI
+SELECT
+SUM(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+AVG(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+COUNT(*) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+-48302 -16100.666666666666 3
+11243 3747.666666666667 3
+-51311 -17103.666666666668 3
+-2391 -797 3
+46756 15585.333333333334 3
+
+
+# async fn window_frame_rows_preceding_stddev_variance() -> Result<()> {
+query RRRR
+SELECT
+VAR(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+VAR_POP(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+STDDEV(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+STDDEV_POP(c4) OVER(ORDER BY c4 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+46721.33333333174 31147.555555554496 216.151181660734 176.486700789477
+2639429.333333332 1759619.5555555548 1624.632060908971 1326.50652299774
+746202.3333333324 497468.2222222216 863.830037295146 705.314271954156
+768422.9999999981 512281.9999999988 876.597399037893 715.738779164577
+66526.3333333288 44350.88888888587 257.926992254259 210.596507304575
+
+# async fn window_frame_rows_preceding_with_partition_unique_order_by() ->
Result<()> {
+query IRI
+SELECT
+SUM(c4) OVER(PARTITION BY c1 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING),
+AVG(c4) OVER(PARTITION BY c1 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING),
+COUNT(*) OVER(PARTITION BY c2 ORDER BY c9 ROWS BETWEEN 1 PRECEDING AND 1
FOLLOWING)
+FROM aggregate_test_100
+ORDER BY c9
+LIMIT 5
+----
+-38611 -19305.5 2
+17547 8773.5 2
+-1301 -650.5 2
+26638 13319 3
+26861 8953.666666666666 3
+
+# /// The partition by clause conducts sorting according to given partition
column by default. If the
+# /// sorting columns have non unique values, the unstable sorting may produce
indeterminate results.
+# /// Therefore, we are commenting out the following test for now.
+
+#// #[tokio::test]
+#// async fn window_frame_rows_preceding_with_non_unique_partition() ->
Result<()> {
+#// let ctx = SessionContext::new();
+#// register_aggregate_csv(&ctx).await?;
+#// let sql = "SELECT
+#// SUM(c4) OVER(PARTITION BY c1 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING),
+#// COUNT(*) OVER(PARTITION BY c2 ROWS BETWEEN 1 PRECEDING AND 1 FOLLOWING)
+#// FROM aggregate_test_100
+#// ORDER BY c9
+#// LIMIT 5
+#// let actual = execute_to_batches(&ctx, sql).await;
+#// let expected = vec![
+#// "+----------------------------+-----------------+",
+#// "| SUM(aggregate_test_100.c4) | COUNT(UInt8(1)) |",
+#// "+----------------------------+-----------------+",
+#// "| -33822 | 3|",
+#// "| 20808 | 3|",
+#// "| -29881 | 3|",
+#// "| -47613 | 3|",
+#// "| -13474 | 3|",
+#// "+----------------------------+-----------------+",
+#// ];
+#// assert_batches_eq!(expected, &actual);
+#// Ok(())
+#// }
+
+# async fn window_frame_ranges_preceding_following_desc() -> Result<()> {
+query error DataFusion error: Internal error: Operator + is not implemented
for types Int8(5) and Utf8("1"). This was likely caused by a bug in
DataFusion's code and we would welcome that you file an bug report in our issue
tracker
Review Comment:
I think it's a bug, I also met it in other code when disable `skip_failed`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]