This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 04f56bdb17 fix(datafusion-functions-nested): `arrow-distinct` now work 
with null rows (#13966)
04f56bdb17 is described below

commit 04f56bdb17ac3a652c09f1e49c6a10cfc58bad57
Author: Raz Luvaton <[email protected]>
AuthorDate: Thu Jan 2 20:20:28 2025 +0200

    fix(datafusion-functions-nested): `arrow-distinct` now work with null rows 
(#13966)
    
    * added failing test
    
    * fix(datafusion-functions-nested): `arrow-distinct` now work with null rows
    
    * Update datafusion/functions-nested/src/set_ops.rs
    
    Co-authored-by: Andrew Lamb <[email protected]>
    
    * Update set_ops.rs
    
    ---------
    
    Co-authored-by: Andrew Lamb <[email protected]>
---
 datafusion/functions-nested/src/set_ops.rs   | 12 +++++++++---
 datafusion/sqllogictest/test_files/array.slt |  7 +++++++
 2 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/datafusion/functions-nested/src/set_ops.rs 
b/datafusion/functions-nested/src/set_ops.rs
index 202330715b..893fc933d0 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -516,11 +516,16 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
     let mut new_arrays = Vec::with_capacity(array.len());
     let converter = RowConverter::new(vec![SortField::new(dt)])?;
     // distinct for each list in ListArray
-    for arr in array.iter().flatten() {
+    for arr in array.iter() {
+        let last_offset: OffsetSize = offsets.last().copied().unwrap();
+        let Some(arr) = arr else {
+            // Add same offset for null
+            offsets.push(last_offset);
+            continue;
+        };
         let values = converter.convert_columns(&[arr])?;
         // sort elements in list and remove duplicates
         let rows = values.iter().sorted().dedup().collect::<Vec<_>>();
-        let last_offset: OffsetSize = offsets.last().copied().unwrap();
         offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
         let arrays = converter.convert_rows(rows)?;
         let array = match arrays.first() {
@@ -538,6 +543,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
         Arc::clone(field),
         offsets,
         values,
-        None,
+        // Keep the list nulls
+        array.nulls().cloned(),
     )?))
 }
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index a023e06030..dcceeebaf4 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5674,6 +5674,13 @@ select array_distinct([sum(a)]) from t1 where a > 100 
group by b;
 statement ok
 drop table t1;
 
+query ?
+select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a);
+----
+[1, 2, 3]
+NULL
+[1, 3]
+
 query ?
 select array_distinct([]);
 ----


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to