This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 057583d5c0 chore: Use an enum to express the different kinds of
nullability in an array (#18048)
057583d5c0 is described below
commit 057583d5c02fb6b555bdd64b87cf596daa05d70f
Author: Martin Grigorov <[email protected]>
AuthorDate: Wed Oct 15 13:37:12 2025 +0300
chore: Use an enum to express the different kinds of nullability in an
array (#18048)
* chore: Use an enum to express the different kinds of nullability in an
array
Follow-up of
https://github.com/apache/datafusion/pull/17726#pullrequestreview-3289115878
* Use the Nulls enum also in .../multi_group_by/primitive.rs
* Use the Nulls enum in .../multi_group_by/bytes[_view].rs as well
---
.../aggregates/group_values/multi_group_by/boolean.rs | 13 +++++++------
.../src/aggregates/group_values/multi_group_by/bytes.rs | 16 +++++++++-------
.../aggregates/group_values/multi_group_by/bytes_view.rs | 16 +++++++++-------
.../src/aggregates/group_values/multi_group_by/mod.rs | 10 ++++++++++
.../aggregates/group_values/multi_group_by/primitive.rs | 16 +++++++++-------
5 files changed, 44 insertions(+), 27 deletions(-)
diff --git
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
index 91a9e21aeb..03e26446f5 100644
---
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
+++
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/boolean.rs
@@ -17,6 +17,7 @@
use std::sync::Arc;
+use crate::aggregates::group_values::multi_group_by::Nulls;
use crate::aggregates::group_values::multi_group_by::{nulls_equal_to,
GroupColumn};
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
use arrow::array::{Array as _, ArrayRef, AsArray, BooleanArray,
BooleanBufferBuilder};
@@ -115,15 +116,15 @@ impl<const NULLABLE: bool> GroupColumn for
BooleanGroupValueBuilder<NULLABLE> {
let null_count = array.null_count();
let num_rows = array.len();
let all_null_or_non_null = if null_count == 0 {
- Some(true)
+ Nulls::None
} else if null_count == num_rows {
- Some(false)
+ Nulls::All
} else {
- None
+ Nulls::Some
};
match (NULLABLE, all_null_or_non_null) {
- (true, None) => {
+ (true, Nulls::Some) => {
for &row in rows {
if array.is_null(row) {
self.nulls.append(true);
@@ -135,14 +136,14 @@ impl<const NULLABLE: bool> GroupColumn for
BooleanGroupValueBuilder<NULLABLE> {
}
}
- (true, Some(true)) => {
+ (true, Nulls::None) => {
self.nulls.append_n(rows.len(), false);
for &row in rows {
self.buffer.append(arr.value(row));
}
}
- (true, Some(false)) => {
+ (true, Nulls::All) => {
self.nulls.append_n(rows.len(), true);
self.buffer.append_n(rows.len(), bool::default());
}
diff --git
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
index ff00b9f0e0..d52721c2ee 100644
---
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
+++
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::aggregates::group_values::multi_group_by::{nulls_equal_to,
GroupColumn};
+use crate::aggregates::group_values::multi_group_by::{
+ nulls_equal_to, GroupColumn, Nulls,
+};
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
use arrow::array::{
types::GenericStringType, Array, ArrayRef, AsArray, BufferBuilder,
@@ -138,28 +140,28 @@ where
let null_count = array.null_count();
let num_rows = array.len();
let all_null_or_non_null = if null_count == 0 {
- Some(true)
+ Nulls::None
} else if null_count == num_rows {
- Some(false)
+ Nulls::All
} else {
- None
+ Nulls::Some
};
match all_null_or_non_null {
- None => {
+ Nulls::Some => {
for &row in rows {
self.append_val_inner::<B>(array, row)?
}
}
- Some(true) => {
+ Nulls::None => {
self.nulls.append_n(rows.len(), false);
for &row in rows {
self.do_append_val_inner(arr, row)?;
}
}
- Some(false) => {
+ Nulls::All => {
self.nulls.append_n(rows.len(), true);
let new_len = self.offsets.len() + rows.len();
diff --git
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
index 599268baec..fde477c2cf 100644
---
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
+++
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/bytes_view.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::aggregates::group_values::multi_group_by::{nulls_equal_to,
GroupColumn};
+use crate::aggregates::group_values::multi_group_by::{
+ nulls_equal_to, GroupColumn, Nulls,
+};
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
use arrow::array::{make_view, Array, ArrayRef, AsArray, ByteView,
GenericByteViewArray};
use arrow::buffer::{Buffer, ScalarBuffer};
@@ -145,28 +147,28 @@ impl<B: ByteViewType> ByteViewGroupValueBuilder<B> {
let null_count = array.null_count();
let num_rows = array.len();
let all_null_or_non_null = if null_count == 0 {
- Some(true)
+ Nulls::None
} else if null_count == num_rows {
- Some(false)
+ Nulls::All
} else {
- None
+ Nulls::Some
};
match all_null_or_non_null {
- None => {
+ Nulls::Some => {
for &row in rows {
self.append_val_inner(array, row);
}
}
- Some(true) => {
+ Nulls::None => {
self.nulls.append_n(rows.len(), false);
for &row in rows {
self.do_append_val_inner(arr, row);
}
}
- Some(false) => {
+ Nulls::All => {
self.nulls.append_n(rows.len(), true);
let new_len = self.views.len() + rows.len();
self.views.resize(new_len, 0);
diff --git
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
index 849aac5bb2..58bd35d640 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/mod.rs
@@ -1250,6 +1250,16 @@ fn supported_type(data_type: &DataType) -> bool {
)
}
+///Shows how many `null`s there are in an array
+enum Nulls {
+ /// All array items are `null`s
+ All,
+ /// There are both `null`s and non-`null`s in the array items
+ Some,
+ /// There are no `null`s in the array items
+ None,
+}
+
#[cfg(test)]
mod tests {
use std::{collections::HashMap, sync::Arc};
diff --git
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
index f2b12a8c95..a586197e50 100644
---
a/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
+++
b/datafusion/physical-plan/src/aggregates/group_values/multi_group_by/primitive.rs
@@ -15,7 +15,9 @@
// specific language governing permissions and limitations
// under the License.
-use crate::aggregates::group_values::multi_group_by::{nulls_equal_to,
GroupColumn};
+use crate::aggregates::group_values::multi_group_by::{
+ nulls_equal_to, GroupColumn, Nulls,
+};
use crate::aggregates::group_values::null_builder::MaybeNullBufferBuilder;
use arrow::array::ArrowNativeTypeOp;
use arrow::array::{cast::AsArray, Array, ArrayRef, ArrowPrimitiveType,
PrimitiveArray};
@@ -132,15 +134,15 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool>
GroupColumn
let null_count = array.null_count();
let num_rows = array.len();
let all_null_or_non_null = if null_count == 0 {
- Some(true)
+ Nulls::None
} else if null_count == num_rows {
- Some(false)
+ Nulls::All
} else {
- None
+ Nulls::Some
};
match (NULLABLE, all_null_or_non_null) {
- (true, None) => {
+ (true, Nulls::Some) => {
for &row in rows {
if array.is_null(row) {
self.nulls.append(true);
@@ -152,14 +154,14 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool>
GroupColumn
}
}
- (true, Some(true)) => {
+ (true, Nulls::None) => {
self.nulls.append_n(rows.len(), false);
for &row in rows {
self.group_values.push(arr.value(row));
}
}
- (true, Some(false)) => {
+ (true, Nulls::All) => {
self.nulls.append_n(rows.len(), true);
self.group_values
.extend(iter::repeat_n(T::default_value(), rows.len()));
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]