This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 18f920135c Fix `equal_to` in `ByteGroupValueBuilder` (#12770)
18f920135c is described below

commit 18f920135c4ff9cfd081a335d20ee675b2916503
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Oct 6 06:40:21 2024 -0400

    Fix `equal_to` in `ByteGroupValueBuilder` (#12770)
    
    * Fix `equal_to` in `ByteGroupValueBuilder`
    
    * refactor null_equal_to
    
    * Update 
datafusion/physical-plan/src/aggregates/group_values/group_column.rs
---
 .../src/aggregates/group_values/group_column.rs    | 108 +++++++++++++++++----
 1 file changed, 88 insertions(+), 20 deletions(-)

diff --git 
a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs 
b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
index aa246ac95b..5d00f300e9 100644
--- a/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
+++ b/datafusion/physical-plan/src/aggregates/group_values/group_column.rs
@@ -93,21 +93,10 @@ impl<T: ArrowPrimitiveType, const NULLABLE: bool> 
GroupColumn
     fn equal_to(&self, lhs_row: usize, array: &ArrayRef, rhs_row: usize) -> 
bool {
         // Perf: skip null check (by short circuit) if input is not nullable
         if NULLABLE {
-            // In nullable path, we should check if both `exist row` and 
`input row`
-            // are null/not null
-            let is_exist_null = self.nulls.is_null(lhs_row);
-            let null_match = is_exist_null == array.is_null(rhs_row);
-            if !null_match {
-                // If `is_null`s in `exist row` and `input row` don't match, 
return not equal to
-                return false;
-            } else if is_exist_null {
-                // If `is_null`s in `exist row` and `input row` match, and 
they are `null`s,
-                // return equal to
-                //
-                // NOTICE: we should not check their values when they are 
`null`s, because they are
-                // meaningless actually, and not ensured to be same
-                //
-                return true;
+            let exist_null = self.nulls.is_null(lhs_row);
+            let input_null = array.is_null(rhs_row);
+            if let Some(result) = nulls_equal_to(exist_null, input_null) {
+                return result;
             }
             // Otherwise, we need to check their values
         }
@@ -224,9 +213,14 @@ where
     where
         B: ByteArrayType,
     {
-        let arr = array.as_bytes::<B>();
-        self.nulls.is_null(lhs_row) == arr.is_null(rhs_row)
-            && self.value(lhs_row) == (arr.value(rhs_row).as_ref() as &[u8])
+        let array = array.as_bytes::<B>();
+        let exist_null = self.nulls.is_null(lhs_row);
+        let input_null = array.is_null(rhs_row);
+        if let Some(result) = nulls_equal_to(exist_null, input_null) {
+            return result;
+        }
+        // Otherwise, we need to check their values
+        self.value(lhs_row) == (array.value(rhs_row).as_ref() as &[u8])
     }
 
     /// return the current value of the specified row irrespective of null
@@ -382,6 +376,20 @@ where
     }
 }
 
+/// Determines if the nullability of the existing and new input array can be 
used
+/// to short-circuit the comparison of the two values.
+///
+/// Returns `Some(result)` if the result of the comparison can be determined
+/// from the nullness of the two values, and `None` if the comparison must be
+/// done on the values themselves.
+fn nulls_equal_to(lhs_null: bool, rhs_null: bool) -> Option<bool> {
+    match (lhs_null, rhs_null) {
+        (true, true) => Some(true),
+        (false, true) | (true, false) => Some(false),
+        _ => None,
+    }
+}
+
 #[cfg(test)]
 mod tests {
     use std::sync::Arc;
@@ -468,13 +476,14 @@ mod tests {
         builder.append_val(&builder_array, 5);
 
         // Define input array
-        let (_, values, _) =
+        let (_nulls, values, _) =
             Int64Array::from(vec![Some(1), Some(2), None, None, Some(1), 
Some(3)])
                 .into_parts();
 
+        // explicitly build a boolean buffer where one of the null values also 
happens to match
         let mut boolean_buffer_builder = BooleanBufferBuilder::new(6);
         boolean_buffer_builder.append(true);
-        boolean_buffer_builder.append(false);
+        boolean_buffer_builder.append(false); // this sets Some(2) to null 
above
         boolean_buffer_builder.append(false);
         boolean_buffer_builder.append(false);
         boolean_buffer_builder.append(true);
@@ -511,4 +520,63 @@ mod tests {
         assert!(builder.equal_to(0, &input_array, 0));
         assert!(!builder.equal_to(1, &input_array, 1));
     }
+
+    #[test]
+    fn test_byte_array_equal_to() {
+        // Will cover such cases:
+        //   - exist null, input not null
+        //   - exist null, input null; values not equal
+        //   - exist null, input null; values equal
+        //   - exist not null, input null
+        //   - exist not null, input not null; values not equal
+        //   - exist not null, input not null; values equal
+
+        // Define PrimitiveGroupValueBuilder
+        let mut builder = ByteGroupValueBuilder::<i32>::new(OutputType::Utf8);
+        let builder_array = Arc::new(StringArray::from(vec![
+            None,
+            None,
+            None,
+            Some("foo"),
+            Some("bar"),
+            Some("baz"),
+        ])) as ArrayRef;
+        builder.append_val(&builder_array, 0);
+        builder.append_val(&builder_array, 1);
+        builder.append_val(&builder_array, 2);
+        builder.append_val(&builder_array, 3);
+        builder.append_val(&builder_array, 4);
+        builder.append_val(&builder_array, 5);
+
+        // Define input array
+        let (offsets, buffer, _nulls) = StringArray::from(vec![
+            Some("foo"),
+            Some("bar"),
+            None,
+            None,
+            Some("foo"),
+            Some("baz"),
+        ])
+        .into_parts();
+
+        // explicitly build a boolean buffer where one of the null values also 
happens to match
+        let mut boolean_buffer_builder = BooleanBufferBuilder::new(6);
+        boolean_buffer_builder.append(true);
+        boolean_buffer_builder.append(false); // this sets Some("bar") to null 
above
+        boolean_buffer_builder.append(false);
+        boolean_buffer_builder.append(false);
+        boolean_buffer_builder.append(true);
+        boolean_buffer_builder.append(true);
+        let nulls = NullBuffer::new(boolean_buffer_builder.finish());
+        let input_array =
+            Arc::new(StringArray::new(offsets, buffer, Some(nulls))) as 
ArrayRef;
+
+        // Check
+        assert!(!builder.equal_to(0, &input_array, 0));
+        assert!(builder.equal_to(1, &input_array, 1));
+        assert!(builder.equal_to(2, &input_array, 2));
+        assert!(!builder.equal_to(3, &input_array, 3));
+        assert!(!builder.equal_to(4, &input_array, 4));
+        assert!(builder.equal_to(5, &input_array, 5));
+    }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to