Jefffrey commented on code in PR #19868:
URL: https://github.com/apache/datafusion/pull/19868#discussion_r2703670040


##########
datafusion/functions-nested/src/sort.rs:
##########
@@ -137,10 +137,18 @@ impl ScalarUDFImpl for ArraySort {
         match &arg_types[0] {
             DataType::Null => Ok(DataType::Null),
             DataType::List(field) => {
-                Ok(DataType::new_list(field.data_type().clone(), true))
+                // Preserve the inner field's nullability from the input

Review Comment:
   These changes seem unrelated to the original issue



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -741,21 +845,47 @@ impl Accumulator for OrderSensitiveArrayAggAccumulator {
             self.sort();
         }
 
-        let mut result = vec![self.evaluate()?];
+        // State uses nullable inner elements to match state_fields() schema

Review Comment:
   Here too



##########
datafusion/spark/src/function/aggregate/collect.rs:
##########
@@ -88,8 +88,9 @@ impl AggregateUDFImpl for SparkCollectList {
         let field = &acc_args.expr_fields[0];
         let data_type = field.data_type().clone();
         let ignore_nulls = true;
+        let input_nullable = field.is_nullable();

Review Comment:
   We haven't fixed the return field of this UDAF so this will be incorrect 
(see `return_type`)



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
         ))))
     }
 
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {
+        // Preserve the nullability of the input field in the list's inner 
element
+        // If the input is non-nullable, elements in the list are non-nullable
+        // If the input is nullable, elements in the list are nullable
+        // The list itself is always nullable (returns NULL for empty results)
+        let input_field = &arg_fields[0];
+        let list_field = Field::new_list_field(
+            input_field.data_type().clone(),
+            input_field.is_nullable(), // preserve input field's nullability 
for elements
+        );
+        Ok(Arc::new(Field::new(
+            self.name(),
+            DataType::List(Arc::new(list_field)),
+            true, // list itself is always nullable (NULL for no rows)

Review Comment:
   These comments repeat themselves a lot, especially with the comments below
   
   Just keep them simple, e.g.
   
   > Outer field is always nullable in case of empty groups
   > Inner list field nullability depends on input field



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -111,6 +111,23 @@ impl AggregateUDFImpl for ArrayAgg {
         ))))
     }
 
+    fn return_field(&self, arg_fields: &[FieldRef]) -> Result<FieldRef> {

Review Comment:
   When we implement `return_field`, `return_type` should be left to return an 
error, for example:
   
   
https://github.com/apache/datafusion/blob/05451da3cfec23553891c76eb6a8656c9dde0869/datafusion/functions-aggregate/src/first_last.rs#L137-L151



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -420,26 +473,69 @@ pub struct DistinctArrayAggAccumulator {
     datatype: DataType,
     sort_options: Option<SortOptions>,
     ignore_nulls: bool,
+    /// Whether the input field is nullable (preserved in result list elements)
+    input_nullable: bool,
 }
 
 impl DistinctArrayAggAccumulator {
     pub fn try_new(
         datatype: &DataType,
         sort_options: Option<SortOptions>,
         ignore_nulls: bool,
+        input_nullable: bool,
     ) -> Result<Self> {
         Ok(Self {
             values: HashSet::new(),
             datatype: datatype.clone(),
             sort_options,
             ignore_nulls,
+            input_nullable,
         })
     }
 }
 
 impl Accumulator for DistinctArrayAggAccumulator {
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.evaluate()?])
+        // State uses nullable inner elements to match state_fields() schema

Review Comment:
   Same here



##########
datafusion/functions-aggregate/src/array_agg.rs:
##########
@@ -373,21 +401,46 @@ impl Accumulator for ArrayAggAccumulator {
     }
 
     fn state(&mut self) -> Result<Vec<ScalarValue>> {
-        Ok(vec![self.evaluate()?])
+        // State uses nullable inner elements to match state_fields() schema

Review Comment:
   This seems like something we should be fixing in `state_fields()` instead of 
having this handling code



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to