This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 06d3bcca5b Fix count(null) and count(distinct null) (#8511)
06d3bcca5b is described below
commit 06d3bcca5b4070e41429ddbd01c5d8155a5b6084
Author: Georgi Krastev <[email protected]>
AuthorDate: Thu Dec 14 21:19:23 2023 +0100
Fix count(null) and count(distinct null) (#8511)
Use `logical_nulls` when the array data type is `Null`.
---
datafusion/physical-expr/src/aggregate/count.rs | 10 ++++++----
.../physical-expr/src/aggregate/count_distinct.rs | 5 +++++
datafusion/sqllogictest/test_files/aggregate.slt | 17 ++++++++++++++++-
3 files changed, 27 insertions(+), 5 deletions(-)
diff --git a/datafusion/physical-expr/src/aggregate/count.rs
b/datafusion/physical-expr/src/aggregate/count.rs
index 738ca4e915..8e9ae5cea3 100644
--- a/datafusion/physical-expr/src/aggregate/count.rs
+++ b/datafusion/physical-expr/src/aggregate/count.rs
@@ -123,7 +123,7 @@ impl GroupsAccumulator for CountGroupsAccumulator {
self.counts.resize(total_num_groups, 0);
accumulate_indices(
group_indices,
- values.nulls(), // ignore values
+ values.logical_nulls().as_ref(),
opt_filter,
|group_index| {
self.counts[group_index] += 1;
@@ -198,16 +198,18 @@ fn null_count_for_multiple_cols(values: &[ArrayRef]) ->
usize {
if values.len() > 1 {
let result_bool_buf: Option<BooleanBuffer> = values
.iter()
- .map(|a| a.nulls())
+ .map(|a| a.logical_nulls())
.fold(None, |acc, b| match (acc, b) {
(Some(acc), Some(b)) => Some(acc.bitand(b.inner())),
(Some(acc), None) => Some(acc),
- (None, Some(b)) => Some(b.inner().clone()),
+ (None, Some(b)) => Some(b.into_inner()),
_ => None,
});
result_bool_buf.map_or(0, |b| values[0].len() - b.count_set_bits())
} else {
- values[0].null_count()
+ values[0]
+ .logical_nulls()
+ .map_or(0, |nulls| nulls.null_count())
}
}
diff --git a/datafusion/physical-expr/src/aggregate/count_distinct.rs
b/datafusion/physical-expr/src/aggregate/count_distinct.rs
index f5242d983d..c2fd32a96c 100644
--- a/datafusion/physical-expr/src/aggregate/count_distinct.rs
+++ b/datafusion/physical-expr/src/aggregate/count_distinct.rs
@@ -152,7 +152,12 @@ impl Accumulator for DistinctCountAccumulator {
if values.is_empty() {
return Ok(());
}
+
let arr = &values[0];
+ if arr.data_type() == &DataType::Null {
+ return Ok(());
+ }
+
(0..arr.len()).try_for_each(|index| {
if !arr.is_null(index) {
let scalar = ScalarValue::try_from_array(arr, index)?;
diff --git a/datafusion/sqllogictest/test_files/aggregate.slt
b/datafusion/sqllogictest/test_files/aggregate.slt
index bcda3464f4..78575c9dff 100644
--- a/datafusion/sqllogictest/test_files/aggregate.slt
+++ b/datafusion/sqllogictest/test_files/aggregate.slt
@@ -1492,6 +1492,12 @@ SELECT count(c1, c2) FROM test
----
3
+# count_null
+query III
+SELECT count(null), count(null, null), count(distinct null) FROM test
+----
+0 0 0
+
# count_multi_expr_group_by
query I
SELECT count(c1, c2) FROM test group by c1 order by c1
@@ -1501,6 +1507,15 @@ SELECT count(c1, c2) FROM test group by c1 order by c1
2
0
+# count_null_group_by
+query III
+SELECT count(null), count(null, null), count(distinct null) FROM test group by
c1 order by c1
+----
+0 0 0
+0 0 0
+0 0 0
+0 0 0
+
# aggreggte_with_alias
query II
select c1, sum(c2) as `Total Salary` from test group by c1 order by c1
@@ -3241,4 +3256,4 @@ select count(*) from (select count(*) from (select 1));
query I
select count(*) from (select count(*) a, count(*) b from (select 1));
----
-1
\ No newline at end of file
+1