mustafasrepo commented on code in PR #6234:
URL: https://github.com/apache/arrow-datafusion/pull/6234#discussion_r1188481299


##########
datafusion/core/tests/sqllogictests/test_files/groupby.slt:
##########
@@ -1921,3 +1921,96 @@ SELECT DISTINCT + col1 FROM tab2 AS cor0 GROUP BY 
cor0.col1
 41
 59
 61
+
+
+
+# Columns in the table are a,b,c,d. Source is CsvExec which is ordered by
+# a,b,c column. Column a has cardinality 2, column b has cardinality 4.
+# Column c has cardinality 100 (unique entries). Column d has cardinality 5.
+statement ok
+CREATE EXTERNAL TABLE annotated_data_finite2 (
+  a0 INTEGER,
+  a INTEGER,
+  b INTEGER,
+  c INTEGER,
+  d INTEGER
+)
+STORED AS CSV
+WITH HEADER ROW
+WITH ORDER (a ASC, b ASC, c ASC)
+LOCATION 'tests/data/window_2.csv';
+
+
+# test_source_sorted_groupby
+query TT
+EXPLAIN SELECT a, b,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY b, a
+----
+logical_plan
+Projection: annotated_data_finite2.a, annotated_data_finite2.b, 
SUM(annotated_data_finite2.c) AS summation1
+  Aggregate: groupBy=[[annotated_data_finite2.b, annotated_data_finite2.a]], 
aggr=[[SUM(annotated_data_finite2.c)]]
+    TableScan: annotated_data_finite2 projection=[a, b, c]
+physical_plan
+ProjectionExec: expr=[a@1 as a, b@0 as b, SUM(annotated_data_finite2.c)@2 as 
summation1]
+  AggregateExec: mode=FinalPartitioned, gby=[b@0 as b, a@1 as a], 
aggr=[SUM(annotated_data_finite2.c)]
+    CoalesceBatchesExec: target_batch_size=8192
+      RepartitionExec: partitioning=Hash([Column { name: "b", index: 0 }, 
Column { name: "a", index: 1 }], 4), input_partitions=4
+        AggregateExec: mode=Partial, gby=[b@1 as b, a@0 as a], 
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=FullyOrdered
+          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+            CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, b, 
c], output_ordering=[a@0 ASC NULLS LAST, b@1 ASC NULLS LAST, c@2 ASC NULLS 
LAST], has_header=true
+
+
+query III rowsort
+ SELECT a, b,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY b, a
+----
+0 0 300
+0 1 925
+1 2 1550
+1 3 2175
+
+
+# test_source_sorted_groupby2
+
+query TT
+EXPLAIN SELECT a, d,
+ SUM(c) as summation1
+ FROM annotated_data_finite2
+ GROUP BY d, a
+----
+logical_plan
+Projection: annotated_data_finite2.a, annotated_data_finite2.d, 
SUM(annotated_data_finite2.c) AS summation1
+  Aggregate: groupBy=[[annotated_data_finite2.d, annotated_data_finite2.a]], 
aggr=[[SUM(annotated_data_finite2.c)]]
+    TableScan: annotated_data_finite2 projection=[a, c, d]
+physical_plan
+ProjectionExec: expr=[a@1 as a, d@0 as d, SUM(annotated_data_finite2.c)@2 as 
summation1]
+  AggregateExec: mode=FinalPartitioned, gby=[d@0 as d, a@1 as a], 
aggr=[SUM(annotated_data_finite2.c)]
+    CoalesceBatchesExec: target_batch_size=8192
+      RepartitionExec: partitioning=Hash([Column { name: "d", index: 0 }, 
Column { name: "a", index: 1 }], 4), input_partitions=4
+        AggregateExec: mode=Partial, gby=[d@2 as d, a@0 as a], 
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=PartiallyOrdered
+          RepartitionExec: partitioning=RoundRobinBatch(4), input_partitions=1
+            CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, 
d], output_ordering=[a@0 ASC NULLS LAST], has_header=true

Review Comment:
   Similarly, this section would turn into
   ```sql
     AggregateExec: mode=Single, gby=[d@2 as d, a@0 as a], 
aggr=[SUM(annotated_data_finite2.c)], ordering_mode=PartiallyOrdered
       CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[a, c, 
d], infinite_source=true, output_ordering=[a@0 ASC NULLS LAST], has_header=true
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to