mustafasrepo commented on code in PR #6234:
URL: https://github.com/apache/arrow-datafusion/pull/6234#discussion_r1188481299
##########
datafusion/core/tests/sqllogictests/test_files/groupby.slt:
##########
@@ -1921,3 +1921,96 @@ SELECT DISTINCT + col1 FROM tab2 AS cor0 GROUP BY
cor0.col1
41
59
61
+
+
+
+# Columns in the table are a,b,c,d. Source is CsvExec which is ordered by
+# a,b,c column. Column a has cardinality 2, column b has cardinality 4.
+# Column c has cardinality 100 (unique entries). Column d has cardinality 5.
+statement ok
+CREATE EXTERNAL TABLE annotated_data_finite2 (
+ a0 INTEGER,
+ a INTEGER,
+ b INTEGER,
+ c INTEGER,
+ d INTEGER
+)
+STORED AS CSV
+WITH HEADER ROW
+WITH ORDER (a ASC, b ASC, c ASC)
+LOCATION 'tests/data/window_2.csv';
+
+
+# test_source_sorted_groupby
+query TT
+EXPLAIN SELECT a, b,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY b, a
+----
+logical_plan
+Projection: annotated_data_finite2.a, annotated_data_finite2.b,
SUM(annotated_data_finite2.c) AS summation1
+ Aggregate: groupBy=[[annotated_data_finite2.b, annotated_data_finite2.a]],
aggr=[[SUM(annotated_data_finite2.c)]]
+ TableScan: annotated_data_finite2 projection=[a, b, c]
+physical_plan
+ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_finite2.c)@2 as
summation1]
+ AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a],
aggr=[SUM(annotated_data_finite2.c)]
+ CoalesceBatchesExec: target_batch_size=8192
+ RepartitionExec: partitioning=Hash([Column { name: "b", index: 0 },
Column { name: "a", index: 1 }], 4), input_partitions=4
+ AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a],
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=FullyOrdered
+ RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+ CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b,
c], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS
LAST], has_header=true
+
+
+query III rowsort
+ SELECT a, b,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY b, a
+----
+0 0 300
+0 1 925
+1 2 1550
+1 3 2175
+
+
+# test_source_sorted_groupby2
+
+query TT
+EXPLAIN SELECT a, d,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY d, a
+----
+logical_plan
+Projection: annotated_data_finite2.a, annotated_data_finite2.d,
SUM(annotated_data_finite2.c) AS summation1
+ Aggregate: groupBy=[[annotated_data_finite2.d, annotated_data_finite2.a]],
aggr=[[SUM(annotated_data_finite2.c)]]
+ TableScan: annotated_data_finite2 projection=[a, c, d]
+physical_plan
+ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_finite2.c)@2 as
summation1]
+ AggregateExec: mode=FinalPartitioned, gby=[d@0 as d, a@1 as a],
aggr=[SUM(annotated_data_finite2.c)]
+ CoalesceBatchesExec: target_batch_size=8192
+ RepartitionExec: partitioning=Hash([Column { name: "d", index: 0 },
Column { name: "a", index: 1 }], 4), input_partitions=4
+ AggregateExec: mode=Partial, gby=[d@2 as d, a@0 as a],
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=PartiallyOrdered
+ RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+ CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c,
d], output_ordering=[a@0 ASC NULLS LAST], has_header=true
Review Comment:
Similarly, this section would turn into
```sql
AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a],
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=PartiallyOrdered
CsvExec: file_groups={1 group:
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c,
d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]