NGA-TRAN commented on code in PR #7566:
URL: https://github.com/apache/arrow-datafusion/pull/7566#discussion_r1328795737
##########
datafusion/sqllogictest/test_files/select.slt:
##########
@@ -888,6 +888,112 @@ physical_plan
ProjectionExec: expr=[a@0 as a, b@1 as b, 2 as Int64(2)]
--CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b],
output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST], has_header=true
+# source is ordered by a,b,c
+# when filter result is constant for column a
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY c;
+----
+logical_plan
+Sort: annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.b ASC NULLS LAST, annotated_data_finite2.c ASC
NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [b@2 ASC NULLS LAST,c@3 ASC NULLS LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
+
+# source is ordered by a,b,c
+# when filter result is constant for column a and b
+# ordering a, b, c is still satisfied. Final plan shouldn't have
+# SortExec.
+query TT
+EXPLAIN SELECT *
+FROM annotated_data_finite2
+WHERE a=0 and b=0
+ORDER BY a, b, c;
+----
+logical_plan
+Sort: annotated_data_finite2.a ASC NULLS LAST, annotated_data_finite2.b ASC
NULLS LAST, annotated_data_finite2.c ASC NULLS LAST
+--Filter: annotated_data_finite2.a = Int32(0) AND annotated_data_finite2.b =
Int32(0)
+----TableScan: annotated_data_finite2 projection=[a0, a, b, c, d],
partial_filters=[annotated_data_finite2.a = Int32(0), annotated_data_finite2.b
= Int32(0)]
+physical_plan
+SortPreservingMergeExec: [a@1 ASC NULLS LAST,b@2 ASC NULLS LAST,c@3 ASC NULLS
LAST]
+--CoalesceBatchesExec: target_batch_size=8192
+----FilterExec: a@1 = 0 AND b@2 = 0
+------RepartitionExec: partitioning=RoundRobinBatch(2), input_partitions=1
+--------CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a0, a,
b, c, d], output_ordering=[a@1 ASC NULLS LAST, b@2 ASC NULLS LAST, c@3 ASC
NULLS LAST], has_header=true
Review Comment:
This plan without SortExec is awesome. Thanks so much @mustafasrepo for
implementing it and @alamb for reviewing it
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]