Copilot commented on code in PR #19868:
URL: https://github.com/apache/datafusion/pull/19868#discussion_r2700988056


##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -1116,6 +1133,23 @@ mod tests {
         Ok(())
     }
 
+    #[test]
+    fn return_field_preserves_input_nullability() -> Result<()> {
+        let array_agg = ArrayAgg::default();
+
+        // Test with nullable input field
+        let nullable_field = Arc::new(Field::new("input", DataType::Int64, 
true));
+        let result_field = array_agg.return_field(&[nullable_field.clone()])?;
+        assert!(result_field.is_nullable(), "Result should be nullable when 
input is nullable");
+
+        // Test with non-nullable input field
+        let non_nullable_field = Arc::new(Field::new("input", DataType::Int64, 
false));
+        let result_field = array_agg.return_field(&[non_nullable_field])?;
+        assert!(!result_field.is_nullable(), "Result should be non-nullable 
when input is non-nullable");
+
+        Ok(())
+    }

Review Comment:
   This test only verifies the schema metadata (Field nullability) but doesn't 
test the runtime behavior. A more comprehensive test should verify that when 
the input is non-nullable and the result field is marked as non-nullable, the 
accumulator never produces NULL values (even for empty groups). Currently, the 
accumulator returns NULL for empty groups (see lines 402, 504, 773-777), which 
would violate a non-nullable schema contract.



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
         ))))
     }
 
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+        // Preserve the nullability of the input field in the list field
+        // If the input is non-nullable, the list itself is non-nullable
+        // If the input is nullable, the list is nullable
+        let input_field = &arg_fields[0];
+        let list_field = Field::new_list_field(
+            input_field.data_type().clone(),
+            true, // elements in the list can be null
+        );
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::List(Arc::new(list_field)),
+            input_field.is_nullable(), // preserve input field's nullability
+        )))

Review Comment:
   This logic is incorrect. The accumulator's evaluate methods (lines 402, 504, 
and 773-777) return NULL when there are no values in the group, regardless of 
whether the input field is nullable or not. Making the result field 
non-nullable when the input is non-nullable would violate the schema contract, 
as the accumulator can still produce NULL values for empty groups. The result 
field should always be nullable for ARRAY_AGG, or the accumulator 
implementation needs to be changed to return an empty array instead of NULL for 
empty groups when the input is non-nullable.



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
         ))))
     }
 
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+        // Preserve the nullability of the input field in the list field
+        // If the input is non-nullable, the list itself is non-nullable
+        // If the input is nullable, the list is nullable
+        let input_field = &arg_fields[0];
+        let list_field = Field::new_list_field(
+            input_field.data_type().clone(),
+            true, // elements in the list can be null
+        );
+
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::List(Arc::new(list_field)),
+            input_field.is_nullable(), // preserve input field's nullability

Review Comment:
   The comment "Preserve the nullability of the input field in the list field" 
may be slightly ambiguous. The nullability is preserved in the outer aggregate 
result field (the Field with DataType::List), not in the inner list_field 
(which represents the elements of the list). Consider clarifying this to say 
"Preserve the nullability of the input field in the aggregate result field" or 
"The aggregate result field's nullability matches the input field's 
nullability".
   ```suggestion
           // Preserve the nullability of the input field in the aggregate 
result field
           // If the input is non-nullable, the aggregate result field is 
non-nullable
           // If the input is nullable, the aggregate result field is nullable
           let input_field = &arg_fields[0];
           let list_field = Field::new_list_field(
               input_field.data_type().clone(),
               true, // elements in the list (array elements) can be null
           );
   
           Ok(Arc::new(Field::new(
               self.name(),
               DataType::List(Arc::new(list_field)),
               input_field.is_nullable(), // preserve input field's nullability 
in the aggregate result field
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to