alamb commented on code in PR #12623:
URL: https://github.com/apache/datafusion/pull/12623#discussion_r1775413885


##########
datafusion/physical-plan/src/aggregates/group_values/group_value_row.rs:
##########
@@ -62,57 +62,81 @@ pub trait ArrayRowEq: Send + Sync {
 
 pub struct PrimitiveGroupValueBuilder<T: ArrowPrimitiveType> {
     group_values: Vec<T::Native>,
-    nulls: Vec<bool>,
-    // whether the array contains at least one null, for fast non-null path
-    has_null: bool,
+    /// Nulls buffer for the group values --
+    /// * None if we have seen no arrays yet
+    /// * Some if we have seen arrays and have at least one null
+    ///
+    /// Note this is an Arrow *VALIDITY* buffer (so it is false for nulls, true
+    /// for non-nulls)
+    nulls: Option<BooleanBufferBuilder>,
+    /// If true, the input is guaranteed not to have nulls
     nullable: bool,
 }
 
 impl<T> PrimitiveGroupValueBuilder<T>
 where
     T: ArrowPrimitiveType,
 {
+    /// Create a new [`PrimitiveGroupValueBuilder`]
+    ///
+    /// If `nullable` is false, it means the input will never have nulls
     pub fn new(nullable: bool) -> Self {
         Self {
             group_values: vec![],
-            nulls: vec![],
-            has_null: false,
+            nulls: None,
             nullable,
         }
     }
 }
 
 impl<T: ArrowPrimitiveType> ArrayRowEq for PrimitiveGroupValueBuilder<T> {
     fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> 
bool {
-        // non-null fast path
-        // both non-null
+        // fast path when input has no nulls
         if !self.nullable {
+            debug_assert!(self.nulls.is_none());
             return self.group_values[lhs_row]
                 == array.as_primitive::<T>().value(rhs_row);
         }
-
-        // lhs is non-null
-        if self.nulls[lhs_row] {
-            if array.is_null(rhs_row) {
+        // slow path if the input could have nulls
+        if let Some(nulls) = self.nulls.as_ref() {
+            // if lhs_row is valid (non null), but array is null
+            let lhs_is_null = !nulls.get_bit(lhs_row);
+            let rhs_is_null = array.is_null(rhs_row);
+            if lhs_is_null != rhs_is_null {
                 return false;
             }
-
-            return self.group_values[lhs_row]
-                == array.as_primitive::<T>().value(rhs_row);
         }
-
-        array.is_null(rhs_row)
+        self.group_values[lhs_row] == array.as_primitive::<T>().value(rhs_row)
     }
 
     fn append_val(&mut self, array: &ArrayRef, row: usize) {
-        if self.nullable && array.is_null(row) {
-            self.group_values.push(T::default_value());
-            self.nulls.push(false);
-            self.has_null = true;
-        } else {
-            let elem = array.as_primitive::<T>().value(row);
-            self.group_values.push(elem);
-            self.nulls.push(true);

Review Comment:
   it seems like this previous code manages `self.nulls` even when there are 
and have been no nulls at all in the input



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org
For additional commands, e-mail: github-h...@datafusion.apache.org

Reply via email to