Copilot commented on code in PR #19868:
URL: https://github.com/apache/datafusion/pull/19868#discussion_r2700988056
##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -1116,6 +1133,23 @@ mod tests {
Ok(())
}
+ #[test]
+ fn return_field_preserves_input_nullability() -> Result<()> {
+ let array_agg = ArrayAgg::default();
+
+ // Test with nullable input field
+ let nullable_field = Arc::new(Field::new("input", DataType::Int64,
true));
+ let result_field = array_agg.return_field(&[nullable_field.clone()])?;
+ assert!(result_field.is_nullable(), "Result should be nullable when
input is nullable");
+
+ // Test with non-nullable input field
+ let non_nullable_field = Arc::new(Field::new("input", DataType::Int64,
false));
+ let result_field = array_agg.return_field(&[non_nullable_field])?;
+ assert!(!result_field.is_nullable(), "Result should be non-nullable
when input is non-nullable");
+
+ Ok(())
+ }
Review Comment:
This test only verifies the schema metadata (Field nullability) but doesn't
test the runtime behavior. A more comprehensive test should verify that when
the input is non-nullable and the result field is marked as non-nullable, the
accumulator never produces NULL values (even for empty groups). Currently, the
accumulator returns NULL for empty groups (see lines 402, 504, 773-777), which
would violate a non-nullable schema contract.
##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
))))
}
+ fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+ // Preserve the nullability of the input field in the list field
+ // If the input is non-nullable, the list itself is non-nullable
+ // If the input is nullable, the list is nullable
+ let input_field = &arg_fields[0];
+ let list_field = Field::new_list_field(
+ input_field.data_type().clone(),
+ true, // elements in the list can be null
+ );
+
+ Ok(Arc::new(Field::new(
+ self.name(),
+ DataType::List(Arc::new(list_field)),
+ input_field.is_nullable(), // preserve input field's nullability
+ )))
Review Comment:
This logic is incorrect. The accumulator's evaluate methods (lines 402, 504,
and 773-777) return NULL when there are no values in the group, regardless of
whether the input field is nullable or not. Making the result field
non-nullable when the input is non-nullable would violate the schema contract,
as the accumulator can still produce NULL values for empty groups. The result
field should always be nullable for ARRAY_AGG, or the accumulator
implementation needs to be changed to return an empty array instead of NULL for
empty groups when the input is non-nullable.
##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
))))
}
+ fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+ // Preserve the nullability of the input field in the list field
+ // If the input is non-nullable, the list itself is non-nullable
+ // If the input is nullable, the list is nullable
+ let input_field = &arg_fields[0];
+ let list_field = Field::new_list_field(
+ input_field.data_type().clone(),
+ true, // elements in the list can be null
+ );
+
+ Ok(Arc::new(Field::new(
+ self.name(),
+ DataType::List(Arc::new(list_field)),
+ input_field.is_nullable(), // preserve input field's nullability
Review Comment:
The comment "Preserve the nullability of the input field in the list field"
may be slightly ambiguous. The nullability is preserved in the outer aggregate
result field (the Field with DataType::List), not in the inner list_field
(which represents the elements of the list). Consider clarifying this to say
"Preserve the nullability of the input field in the aggregate result field" or
"The aggregate result field's nullability matches the input field's
nullability".
```suggestion
// Preserve the nullability of the input field in the aggregate
result field
// If the input is non-nullable, the aggregate result field is
non-nullable
// If the input is nullable, the aggregate result field is nullable
let input_field = &arg_fields[0];
let list_field = Field::new_list_field(
input_field.data_type().clone(),
true, // elements in the list (array elements) can be null
);
Ok(Arc::new(Field::new(
self.name(),
DataType::List(Arc::new(list_field)),
input_field.is_nullable(), // preserve input field's nullability
in the aggregate result field
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]