This is an automated email from the ASF dual-hosted git repository.

dheres pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 821e14c5c0 Reduce code duplication in `PrimitiveGroupValueBuilder` 
with const generics (#12703)
821e14c5c0 is described below

commit 821e14c5c0c0524081f1fcea12f357cb8ba86600
Author: Andrew Lamb <[email protected]>
AuthorDate: Wed Oct 2 16:22:21 2024 -0400

    Reduce code duplication in `PrimitiveGroupValueBuilder` with const generics 
(#12703)
    
    * Reduce code duplication in `PrimitiveGroupValueBuilder` with const 
generics
    
    * Fix docs
---
 .../src/aggregates/group_values/column.rs          |  10 +-
 .../src/aggregates/group_values/group_column.rs    | 104 +++++++--------------
 2 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/datafusion/physical-plan/src/aggregates/group_values/column.rs 
b/datafusion/physical-plan/src/aggregates/group_values/column.rs
index 91d87302ce..28f35b2bde 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/column.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/column.rs
@@ -16,8 +16,7 @@
 // under the License.
 
 use crate::aggregates::group_values::group_column::{
-    ByteGroupValueBuilder, GroupColumn, NonNullPrimitiveGroupValueBuilder,
-    PrimitiveGroupValueBuilder,
+    ByteGroupValueBuilder, GroupColumn, PrimitiveGroupValueBuilder,
 };
 use crate::aggregates::group_values::GroupValues;
 use ahash::RandomState;
@@ -124,8 +123,7 @@ impl GroupValuesColumn {
     }
 }
 
-/// instantiates a [`PrimitiveGroupValueBuilder`] or
-/// [`NonNullPrimitiveGroupValueBuilder`]  and pushes it into $v
+/// instantiates a [`PrimitiveGroupValueBuilder`] and pushes it into $v
 ///
 /// Arguments:
 /// `$v`: the vector to push the new builder into
@@ -135,10 +133,10 @@ impl GroupValuesColumn {
 macro_rules! instantiate_primitive {
     ($v:expr, $nullable:expr, $t:ty) => {
         if $nullable {
-            let b = PrimitiveGroupValueBuilder::<$t>::new();
+            let b = PrimitiveGroupValueBuilder::<$t, true>::new();
             $v.push(Box::new(b) as _)
         } else {
-            let b = NonNullPrimitiveGroupValueBuilder::<$t>::new();
+            let b = PrimitiveGroupValueBuilder::<$t, false>::new();
             $v.push(Box::new(b) as _)
         }
     };
diff --git 
a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs 
b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
index 122d77683c..15c9326296 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
@@ -60,75 +60,25 @@ pub trait GroupColumn: Send + Sync {
     fn take_n(&mut self, n: usize) -> ArrayRef;
 }
 
-/// An implementation of [`GroupColumn`] for primitive values which are known 
to have no nulls
-#[derive(Debug)]
-pub struct NonNullPrimitiveGroupValueBuilder<T: ArrowPrimitiveType> {
-    group_values: Vec<T::Native>,
-}
-
-impl<T> NonNullPrimitiveGroupValueBuilder<T>
-where
-    T: ArrowPrimitiveType,
-{
-    pub fn new() -> Self {
-        Self {
-            group_values: vec![],
-        }
-    }
-}
-
-impl<T: ArrowPrimitiveType> GroupColumn for 
NonNullPrimitiveGroupValueBuilder<T> {
-    fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> 
bool {
-        // know input has no nulls
-        self.group_values[lhs_row] == array.as_primitive::<T>().value(rhs_row)
-    }
-
-    fn append_val(&mut self, array: &ArrayRef, row: usize) {
-        // input can't possibly have nulls, so don't worry about them
-        self.group_values.push(array.as_primitive::<T>().value(row))
-    }
-
-    fn len(&self) -> usize {
-        self.group_values.len()
-    }
-
-    fn size(&self) -> usize {
-        self.group_values.allocated_size()
-    }
-
-    fn build(self: Box<Self>) -> ArrayRef {
-        let Self { group_values } = *self;
-
-        let nulls = None;
-
-        Arc::new(PrimitiveArray::<T>::new(
-            ScalarBuffer::from(group_values),
-            nulls,
-        ))
-    }
-
-    fn take_n(&mut self, n: usize) -> ArrayRef {
-        let first_n = self.group_values.drain(0..n).collect::<Vec<_>>();
-        let first_n_nulls = None;
-
-        Arc::new(PrimitiveArray::<T>::new(
-            ScalarBuffer::from(first_n),
-            first_n_nulls,
-        ))
-    }
-}
-
-/// An implementation of [`GroupColumn`] for primitive values which may have 
nulls
+/// An implementation of [`GroupColumn`] for primitive values
+///
+/// Optimized to skip null buffer construction if the input is known to be non 
nullable
+///
+/// # Template parameters
+///
+/// `T`: the native Rust type that stores the data
+/// `NULLABLE`: if the data can contain any nulls
 #[derive(Debug)]
-pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType> {
+pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType, const NULLABLE: 
bool> {
     group_values: Vec<T::Native>,
     nulls: MaybeNullBufferBuilder,
 }
 
-impl<T> PrimitiveGroupValueBuilder<T>
+impl<T, const NULLABLE: bool> PrimitiveGroupValueBuilder<T, NULLABLE>
 where
     T: ArrowPrimitiveType,
 {
+    /// Create a new `PrimitiveGroupValueBuilder`
     pub fn new() -> Self {
         Self {
             group_values: vec![],
@@ -137,18 +87,32 @@ where
     }
 }
 
-impl<T: ArrowPrimitiveType> GroupColumn for PrimitiveGroupValueBuilder<T> {
+impl<T: ArrowPrimitiveType, const NULLABLE: bool> GroupColumn
+    for PrimitiveGroupValueBuilder<T, NULLABLE>
+{
     fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> 
bool {
-        self.nulls.is_null(lhs_row) == array.is_null(rhs_row)
+        // Perf: skip null check (by short circuit) if input is not ullable
+        let null_match = if NULLABLE {
+            self.nulls.is_null(lhs_row) == array.is_null(rhs_row)
+        } else {
+            true
+        };
+
+        null_match
             && self.group_values[lhs_row] == 
array.as_primitive::<T>().value(rhs_row)
     }
 
     fn append_val(&mut self, array: &ArrayRef, row: usize) {
-        if array.is_null(row) {
-            self.nulls.append(true);
-            self.group_values.push(T::default_value());
+        // Perf: skip null check if input can't have nulls
+        if NULLABLE {
+            if array.is_null(row) {
+                self.nulls.append(true);
+                self.group_values.push(T::default_value());
+            } else {
+                self.nulls.append(false);
+                self.group_values.push(array.as_primitive::<T>().value(row));
+            }
         } else {
-            self.nulls.append(false);
             self.group_values.push(array.as_primitive::<T>().value(row));
         }
     }
@@ -168,6 +132,9 @@ impl<T: ArrowPrimitiveType> GroupColumn for 
PrimitiveGroupValueBuilder<T> {
         } = *self;
 
         let nulls = nulls.build();
+        if !NULLABLE {
+            assert!(nulls.is_none(), "unexpected nulls in non nullable input");
+        }
 
         Arc::new(PrimitiveArray::<T>::new(
             ScalarBuffer::from(group_values),
@@ -177,7 +144,8 @@ impl<T: ArrowPrimitiveType> GroupColumn for 
PrimitiveGroupValueBuilder<T> {
 
     fn take_n(&mut self, n: usize) -> ArrayRef {
         let first_n = self.group_values.drain(0..n).collect::<Vec<_>>();
-        let first_n_nulls = self.nulls.take_n(n);
+
+        let first_n_nulls = if NULLABLE { self.nulls.take_n(n) } else { None };
 
         Arc::new(PrimitiveArray::<T>::new(
             ScalarBuffer::from(first_n),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to