mustafasrepo commented on code in PR #9221: URL: https://github.com/apache/arrow-datafusion/pull/9221#discussion_r1498974602
########## datafusion/sqllogictest/test_files/window.slt: ########## @@ -4102,3 +4102,110 @@ ProjectionExec: expr=[ROW_NUMBER() PARTITION BY [a.a] ROWS BETWEEN UNBOUNDED PRE ----------CoalesceBatchesExec: target_batch_size=4096 ------------FilterExec: a@0 = 1 --------------MemoryExec: partitions=1, partition_sizes=[1] + +# LAG window function IGNORE/RESPECT NULLS support with ascending order and default offset 1 +query TTTTTT +select lag(a) ignore nulls over (order by id) as x, + lag(a, 1, null) ignore nulls over (order by id) as x1, + lag(a, 1, 'def') ignore nulls over (order by id) as x2, + lag(a) respect nulls over (order by id) as x3, + lag(a, 1, null) respect nulls over (order by id) as x4, + lag(a, 1, 'def') respect nulls over (order by id) as x5 +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') +---- +NULL NULL def NULL NULL def +NULL NULL def NULL NULL NULL +b b b b b b +b b b NULL NULL NULL + +# LAG window function IGNORE/RESPECT NULLS support with descending order and default offset 1 +query TTTTTT +select lag(a) ignore nulls over (order by id desc) as x, + lag(a, 1, null) ignore nulls over (order by id desc) as x1, + lag(a, 1, 'def') ignore nulls over (order by id desc) as x2, + lag(a) respect nulls over (order by id desc) as x3, + lag(a, 1, null) respect nulls over (order by id desc) as x4, + lag(a, 1, 'def') respect nulls over (order by id desc) as x5 +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') +---- +NULL NULL def NULL NULL def +x x x x x x +x x x NULL NULL NULL +b b b b b b + +# LAG window function IGNORE/RESPECT NULLS support with ascending order and nondefault offset +query TTTT +select lag(a, 2, null) ignore nulls over (order by id) as x1, + lag(a, 2, 'def') ignore nulls over (order by id) as x2, + lag(a, 2, null) respect nulls over (order by id) as x4, + lag(a, 2, 'def') respect nulls over (order by id) as x5 +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') +---- +NULL def NULL def +NULL def NULL def +NULL def NULL NULL +NULL def b b + +# LAG window function IGNORE/RESPECT NULLS support with descending order and nondefault offset +query TTTT +select lag(a, 2, null) ignore nulls over (order by id desc) as x1, + lag(a, 2, 'def') ignore nulls over (order by id desc) as x2, + lag(a, 2, null) respect nulls over (order by id desc) as x4, + lag(a, 2, 'def') respect nulls over (order by id desc) as x5 +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') +---- +NULL def NULL def +NULL def NULL def +NULL def x x +x x NULL NULL + +# LAG window function IGNORE/RESPECT NULLS support with descending order and nondefault offset. +# To trigger WindowAggExec, we added a sum window function with all of the ranges. +statement error Execution error: IGNORE NULLS mode for LAG and LEAD is not supported for WindowAggExec +select lag(a, 2, null) ignore nulls over (order by id desc) as x1, + lag(a, 2, 'def') ignore nulls over (order by id desc) as x2, + lag(a, 2, null) respect nulls over (order by id desc) as x4, + lag(a, 2, 'def') respect nulls over (order by id desc) as x5, + sum(id) over (order by id desc ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) as sum_id +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') + +# LEAD window function IGNORE/RESPECT NULLS support with descending order and nondefault offset +statement error Execution error: IGNORE NULLS mode for LEAD is not supported for BoundedWindowAggExec +select lead(a, 2, null) ignore nulls over (order by id desc) as x1, + lead(a, 2, 'def') ignore nulls over (order by id desc) as x2, + lead(a, 2, null) respect nulls over (order by id desc) as x4, + lead(a, 2, 'def') respect nulls over (order by id desc) as x5 +from (select 2 id, 'b' a union all select 1 id, null a union all select 3 id, null union all select 4 id, 'x') + +statement ok +set datafusion.execution.batch_size = 1000; + +query I +SELECT LAG(c1, 2) IGNORE NULLS OVER() +FROM null_cases +ORDER BY c2 +LIMIT 5; +---- +78 +63 +3 +24 +14 + +# result should be same with above, when lag algorithm work with pruned data. +# decreasing batch size, causes data to be produced in smaller chunks at the source. +# Hence sliding window algorithm is used during calculations. +statement ok +set datafusion.execution.batch_size = 1; + +query I +SELECT LAG(c1, 2) IGNORE NULLS OVER() Review Comment: This test, triggers pruning internally. previous implementation was producing different result than the above result where data is fed as single chunk (because of large batch size), where no pruning is done. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
