This is an automated email from the ASF dual-hosted git repository.

akurmustafa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 8a4bad4654 Add new test (#8992)
8a4bad4654 is described below

commit 8a4bad46540598c6acdf432bde08c2a4c76c5039
Author: Mustafa Akur <[email protected]>
AuthorDate: Fri Jan 26 09:21:38 2024 +0300

    Add new test (#8992)
---
 datafusion/optimizer/src/optimize_projections.rs | 31 ++++++++++++------------
 datafusion/sqllogictest/test_files/select.slt    | 20 +++++++++++++++
 2 files changed, 36 insertions(+), 15 deletions(-)

diff --git a/datafusion/optimizer/src/optimize_projections.rs 
b/datafusion/optimizer/src/optimize_projections.rs
index f87f5fdea9..1035995642 100644
--- a/datafusion/optimizer/src/optimize_projections.rs
+++ b/datafusion/optimizer/src/optimize_projections.rs
@@ -218,6 +218,22 @@ fn optimize_projections(
             // Only use the absolutely necessary aggregate expressions required
             // by the parent:
             let mut new_aggr_expr = get_at_indices(&aggregate.aggr_expr, 
&aggregate_reqs);
+
+            // Aggregations always need at least one aggregate expression.
+            // With a nested count, we don't require any column as input, but
+            // still need to create a correct aggregate, which may be optimized
+            // out later. As an example, consider the following query:
+            //
+            // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
+            //
+            // which always returns 1.
+            if new_aggr_expr.is_empty()
+                && new_group_bys.is_empty()
+                && !aggregate.aggr_expr.is_empty()
+            {
+                new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
+            }
+
             let all_exprs_iter = 
new_group_bys.iter().chain(new_aggr_expr.iter());
             let schema = aggregate.input.schema();
             let necessary_indices = indices_referred_by_exprs(schema, 
all_exprs_iter)?;
@@ -238,21 +254,6 @@ fn optimize_projections(
             let (aggregate_input, _) =
                 add_projection_on_top_if_helpful(aggregate_input, 
necessary_exprs)?;
 
-            // Aggregations always need at least one aggregate expression.
-            // With a nested count, we don't require any column as input, but
-            // still need to create a correct aggregate, which may be optimized
-            // out later. As an example, consider the following query:
-            //
-            // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
-            //
-            // which always returns 1.
-            if new_aggr_expr.is_empty()
-                && new_group_bys.is_empty()
-                && !aggregate.aggr_expr.is_empty()
-            {
-                new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
-            }
-
             // Create a new aggregate plan with the updated input and only the
             // absolutely necessary fields:
             return Aggregate::try_new(
diff --git a/datafusion/sqllogictest/test_files/select.slt 
b/datafusion/sqllogictest/test_files/select.slt
index 5216b14cb2..50c62eff77 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1527,3 +1527,23 @@ SELECT to_timestamp('I AM NOT A TIMESTAMP');
 
 query error DataFusion error: Arrow error: Cast error: Cannot cast string '' 
to value of Int32 type
 SELECT CAST('' AS int);
+
+# See issue: https://github.com/apache/arrow-datafusion/issues/8978
+statement ok
+create table users (id int, name varchar);
+
+statement ok
+insert into users values (1, 'Tom');
+
+statement ok
+create view v as select count(id) from users;
+
+query I
+select * from v;
+----
+1
+
+query I
+select count(1) from v;
+----
+1

Reply via email to