(arrow-datafusion) branch main updated: fix: single_distinct_aggretation_to_group_by fail (#7997)

alamb Thu, 02 Nov 2023 04:56:15 -0700

This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git



The following commit(s) were added to refs/heads/main by this push:
     new 7f3f465718 fix: single_distinct_aggretation_to_group_by fail (#7997)
7f3f465718 is described below

commit 7f3f4657185c0a7c398ced8d1cfdbaf34c19eeb5
Author: Huaijin <[email protected]>
AuthorDate: Thu Nov 2 19:56:03 2023 +0800

    fix: single_distinct_aggretation_to_group_by fail (#7997)
    
    * fix: single_distinct_aggretation_to_group_by faile
    
    * fix
    
    * move test to groupby.slt
---
 .../optimizer/src/single_distinct_to_groupby.rs    |  2 +-
 datafusion/sqllogictest/test_files/groupby.slt     | 58 ++++++++++++++++++++++
 2 files changed, 59 insertions(+), 1 deletion(-)

diff --git a/datafusion/optimizer/src/single_distinct_to_groupby.rs 
b/datafusion/optimizer/src/single_distinct_to_groupby.rs
index 8e0f93cb57..be76c069f0 100644
--- a/datafusion/optimizer/src/single_distinct_to_groupby.rs
+++ b/datafusion/optimizer/src/single_distinct_to_groupby.rs
@@ -74,7 +74,7 @@ fn is_single_distinct_agg(plan: &LogicalPlan) -> Result<bool> 
{
                         distinct_count += 1;
                     }
                     for e in args {
-                        fields_set.insert(e.display_name()?);
+                        fields_set.insert(e.canonical_name());
                     }
                 }
             }
diff --git a/datafusion/sqllogictest/test_files/groupby.slt 
b/datafusion/sqllogictest/test_files/groupby.slt
index 5cb3ac2f81..ef6fee69ab 100644
--- a/datafusion/sqllogictest/test_files/groupby.slt
+++ b/datafusion/sqllogictest/test_files/groupby.slt
@@ -3733,3 +3733,61 @@ AggregateExec: mode=FinalPartitioned, gby=[c@0 as c, b@1 
as b], aggr=[SUM(multip
 --------AggregateExec: mode=Partial, gby=[c@1 as c, b@0 as b], 
aggr=[SUM(multiple_ordered_table_with_pk.d)], ordering_mode=PartiallyOrdered
 ----------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
 ------------CsvExec: file_groups={1 group: 
[[WORKSPACE_ROOT/datafusion/core/tests/data/window_2.csv]]}, projection=[b, c, 
d], output_ordering=[c@1 ASC NULLS LAST], has_header=true
+
+# Tests for single distinct to group by optimization rule
+statement ok
+CREATE TABLE t(x int) AS VALUES (1), (2), (1);
+
+statement ok
+create table t1(x bigint,y int) as values (9223372036854775807,2), 
(9223372036854775806,2);
+
+query II
+SELECT SUM(DISTINCT x), MAX(DISTINCT x) from t GROUP BY x ORDER BY x;
+----
+1 1
+2 2
+
+query II
+SELECT MAX(DISTINCT x), SUM(DISTINCT x) from t GROUP BY x ORDER BY x;
+----
+1 1
+2 2
+
+query TT
+EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT x) FROM t1 GROUP 
BY y;
+----
+logical_plan
+Projection: SUM(DISTINCT t1.x), MAX(DISTINCT t1.x)
+--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(DISTINCT CAST(t1.x AS Float64)), 
MAX(DISTINCT t1.x)]]
+----TableScan: t1 projection=[x, y]
+physical_plan
+ProjectionExec: expr=[SUM(DISTINCT t1.x)@1 as SUM(DISTINCT t1.x), MAX(DISTINCT 
t1.x)@2 as MAX(DISTINCT t1.x)]
+--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(DISTINCT 
t1.x), MAX(DISTINCT t1.x)]
+----CoalesceBatchesExec: target_batch_size=2
+------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
+--------RepartitionExec: partitioning=RoundRobinBatch(8), input_partitions=1
+----------AggregateExec: mode=Partial, gby=[y@1 as y], aggr=[SUM(DISTINCT 
t1.x), MAX(DISTINCT t1.x)]
+------------MemoryExec: partitions=1, partition_sizes=[1]
+
+query TT
+EXPLAIN SELECT SUM(DISTINCT CAST(x AS DOUBLE)), MAX(DISTINCT CAST(x AS 
DOUBLE)) FROM t1 GROUP BY y;
+----
+logical_plan
+Projection: SUM(alias1) AS SUM(DISTINCT t1.x), MAX(alias1) AS MAX(DISTINCT 
t1.x)
+--Aggregate: groupBy=[[t1.y]], aggr=[[SUM(alias1), MAX(alias1)]]
+----Aggregate: groupBy=[[t1.y, CAST(t1.x AS Float64)t1.x AS t1.x AS alias1]], 
aggr=[[]]
+------Projection: CAST(t1.x AS Float64) AS CAST(t1.x AS Float64)t1.x, t1.y
+--------TableScan: t1 projection=[x, y]
+physical_plan
+ProjectionExec: expr=[SUM(alias1)@1 as SUM(DISTINCT t1.x), MAX(alias1)@2 as 
MAX(DISTINCT t1.x)]
+--AggregateExec: mode=FinalPartitioned, gby=[y@0 as y], aggr=[SUM(alias1), 
MAX(alias1)]
+----CoalesceBatchesExec: target_batch_size=2
+------RepartitionExec: partitioning=Hash([y@0], 8), input_partitions=8
+--------AggregateExec: mode=Partial, gby=[y@0 as y], aggr=[SUM(alias1), 
MAX(alias1)]
+----------AggregateExec: mode=FinalPartitioned, gby=[y@0 as y, alias1@1 as 
alias1], aggr=[]
+------------CoalesceBatchesExec: target_batch_size=2
+--------------RepartitionExec: partitioning=Hash([y@0, alias1@1], 8), 
input_partitions=8
+----------------RepartitionExec: partitioning=RoundRobinBatch(8), 
input_partitions=1
+------------------AggregateExec: mode=Partial, gby=[y@1 as y, CAST(t1.x AS 
Float64)t1.x@0 as alias1], aggr=[]
+--------------------ProjectionExec: expr=[CAST(x@0 AS Float64) as CAST(t1.x AS 
Float64)t1.x, y@1 as y]
+----------------------MemoryExec: partitions=1, partition_sizes=[1]

(arrow-datafusion) branch main updated: fix: single_distinct_aggretation_to_group_by fail (#7997)

Reply via email to