This is an automated email from the ASF dual-hosted git repository.
akurmustafa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8a4bad4654 Add new test (#8992)
8a4bad4654 is described below
commit 8a4bad46540598c6acdf432bde08c2a4c76c5039
Author: Mustafa Akur <[email protected]>
AuthorDate: Fri Jan 26 09:21:38 2024 +0300
Add new test (#8992)
---
datafusion/optimizer/src/optimize_projections.rs | 31 ++++++++++++------------
datafusion/sqllogictest/test_files/select.slt | 20 +++++++++++++++
2 files changed, 36 insertions(+), 15 deletions(-)
diff --git a/datafusion/optimizer/src/optimize_projections.rs
b/datafusion/optimizer/src/optimize_projections.rs
index f87f5fdea9..1035995642 100644
--- a/datafusion/optimizer/src/optimize_projections.rs
+++ b/datafusion/optimizer/src/optimize_projections.rs
@@ -218,6 +218,22 @@ fn optimize_projections(
// Only use the absolutely necessary aggregate expressions required
// by the parent:
let mut new_aggr_expr = get_at_indices(&aggregate.aggr_expr,
&aggregate_reqs);
+
+ // Aggregations always need at least one aggregate expression.
+ // With a nested count, we don't require any column as input, but
+ // still need to create a correct aggregate, which may be optimized
+ // out later. As an example, consider the following query:
+ //
+ // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
+ //
+ // which always returns 1.
+ if new_aggr_expr.is_empty()
+ && new_group_bys.is_empty()
+ && !aggregate.aggr_expr.is_empty()
+ {
+ new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
+ }
+
let all_exprs_iter =
new_group_bys.iter().chain(new_aggr_expr.iter());
let schema = aggregate.input.schema();
let necessary_indices = indices_referred_by_exprs(schema,
all_exprs_iter)?;
@@ -238,21 +254,6 @@ fn optimize_projections(
let (aggregate_input, _) =
add_projection_on_top_if_helpful(aggregate_input,
necessary_exprs)?;
- // Aggregations always need at least one aggregate expression.
- // With a nested count, we don't require any column as input, but
- // still need to create a correct aggregate, which may be optimized
- // out later. As an example, consider the following query:
- //
- // SELECT COUNT(*) FROM (SELECT COUNT(*) FROM [...])
- //
- // which always returns 1.
- if new_aggr_expr.is_empty()
- && new_group_bys.is_empty()
- && !aggregate.aggr_expr.is_empty()
- {
- new_aggr_expr = vec![aggregate.aggr_expr[0].clone()];
- }
-
// Create a new aggregate plan with the updated input and only the
// absolutely necessary fields:
return Aggregate::try_new(
diff --git a/datafusion/sqllogictest/test_files/select.slt
b/datafusion/sqllogictest/test_files/select.slt
index 5216b14cb2..50c62eff77 100644
--- a/datafusion/sqllogictest/test_files/select.slt
+++ b/datafusion/sqllogictest/test_files/select.slt
@@ -1527,3 +1527,23 @@ SELECT to_timestamp('I AM NOT A TIMESTAMP');
query error DataFusion error: Arrow error: Cast error: Cannot cast string ''
to value of Int32 type
SELECT CAST('' AS int);
+
+# See issue: https://github.com/apache/arrow-datafusion/issues/8978
+statement ok
+create table users (id int, name varchar);
+
+statement ok
+insert into users values (1, 'Tom');
+
+statement ok
+create view v as select count(id) from users;
+
+query I
+select * from v;
+----
+1
+
+query I
+select count(1) from v;
+----
+1