This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 06d3bcca5b Fix count(null) and count(distinct null) (#8511)
06d3bcca5b is described below

commit 06d3bcca5b4070e41429ddbd01c5d8155a5b6084
Author: Georgi Krastev <[email protected]>
AuthorDate: Thu Dec 14 21:19:23 2023 +0100

    Fix count(null) and count(distinct null) (#8511)
    
    Use `logical_nulls` when the array data type is `Null`.
---
 datafusion/physical-expr/src/aggregate/count.rs         | 10 ++++++----
 .../physical-expr/src/aggregate/count_distinct.rs       |  5 +++++
 datafusion/sqllogictest/test_files/aggregate.slt        | 17 ++++++++++++++++-
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/datafusion/physical-expr/src/aggregate/count.rs 
b/datafusion/physical-expr/src/aggregate/count.rs
index 738ca4e915..8e9ae5cea3 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -123,7 +123,7 @@ impl GroupsAccumulator for CountGroupsAccumulator {
         self.counts.resize(total_num_groups, 0);
         accumulate_indices(
             group_indices,
-            values.nulls(), // ignore values
+            values.logical_nulls().as_ref(),
             opt_filter,
             |group_index| {
                 self.counts[group_index] += 1;
@@ -198,16 +198,18 @@ fn null_count_for_multiple_cols(values: &[ArrayRef]) -> 
usize {
     if values.len() > 1 {
         let result_bool_buf: Option<BooleanBuffer> = values
             .iter()
-            .map(|a| a.nulls())
+            .map(|a| a.logical_nulls())
             .fold(None, |acc, b| match (acc, b) {
                 (Some(acc), Some(b)) => Some(acc.bitand(b.inner())),
                 (Some(acc), None) => Some(acc),
-                (None, Some(b)) => Some(b.inner().clone()),
+                (None, Some(b)) => Some(b.into_inner()),
                 _ => None,
             });
         result_bool_buf.map_or(0, |b| values[0].len() - b.count_set_bits())
     } else {
-        values[0].null_count()
+        values[0]
+            .logical_nulls()
+            .map_or(0, |nulls| nulls.null_count())
     }
 }
 
diff --git a/datafusion/physical-expr/src/aggregate/count_distinct.rs 
b/datafusion/physical-expr/src/aggregate/count_distinct.rs
index f5242d983d..c2fd32a96c 100644
--- a/datafusion/physical-expr/src/aggregate/count_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/count_distinct.rs
@@ -152,7 +152,12 @@ impl Accumulator for DistinctCountAccumulator {
         if values.is_empty() {
             return Ok(());
         }
+
         let arr = &values[0];
+        if arr.data_type() == &DataType::Null {
+            return Ok(());
+        }
+
         (0..arr.len()).try_for_each(|index| {
             if !arr.is_null(index) {
                 let scalar = ScalarValue::try_from_array(arr, index)?;
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt 
b/datafusion/sqllogictest/test_files/aggregate.slt
index bcda3464f4..78575c9dff 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1492,6 +1492,12 @@ SELECT count(c1, c2) FROM test
 ----
 3
 
+# count_null
+query III
+SELECT count(null), count(null, null), count(distinct null) FROM test
+----
+0 0 0
+
 # count_multi_expr_group_by
 query I
 SELECT count(c1, c2) FROM test group by c1 order by c1
@@ -1501,6 +1507,15 @@ SELECT count(c1, c2) FROM test group by c1 order by c1
 2
 0
 
+# count_null_group_by
+query III
+SELECT count(null), count(null, null), count(distinct null) FROM test group by 
c1 order by c1
+----
+0 0 0
+0 0 0
+0 0 0
+0 0 0
+
 # aggreggte_with_alias
 query II
 select c1, sum(c2) as `Total Salary` from test group by c1 order by c1
@@ -3241,4 +3256,4 @@ select count(*) from (select count(*) from (select 1));
 query I
 select count(*) from (select count(*) a, count(*) b from (select 1));
 ----
-1
\ No newline at end of file
+1

Reply via email to