This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 04f56bdb17 fix(datafusion-functions-nested): `arrow-distinct` now work
with null rows (#13966)
04f56bdb17 is described below
commit 04f56bdb17ac3a652c09f1e49c6a10cfc58bad57
Author: Raz Luvaton <[email protected]>
AuthorDate: Thu Jan 2 20:20:28 2025 +0200
fix(datafusion-functions-nested): `arrow-distinct` now work with null rows
(#13966)
* added failing test
* fix(datafusion-functions-nested): `arrow-distinct` now work with null rows
* Update datafusion/functions-nested/src/set_ops.rs
Co-authored-by: Andrew Lamb <[email protected]>
* Update set_ops.rs
---------
Co-authored-by: Andrew Lamb <[email protected]>
---
datafusion/functions-nested/src/set_ops.rs | 12 +++++++++---
datafusion/sqllogictest/test_files/array.slt | 7 +++++++
2 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/datafusion/functions-nested/src/set_ops.rs
b/datafusion/functions-nested/src/set_ops.rs
index 202330715b..893fc933d0 100644
--- a/datafusion/functions-nested/src/set_ops.rs
+++ b/datafusion/functions-nested/src/set_ops.rs
@@ -516,11 +516,16 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
let mut new_arrays = Vec::with_capacity(array.len());
let converter = RowConverter::new(vec![SortField::new(dt)])?;
// distinct for each list in ListArray
- for arr in array.iter().flatten() {
+ for arr in array.iter() {
+ let last_offset: OffsetSize = offsets.last().copied().unwrap();
+ let Some(arr) = arr else {
+ // Add same offset for null
+ offsets.push(last_offset);
+ continue;
+ };
let values = converter.convert_columns(&[arr])?;
// sort elements in list and remove duplicates
let rows = values.iter().sorted().dedup().collect::<Vec<_>>();
- let last_offset: OffsetSize = offsets.last().copied().unwrap();
offsets.push(last_offset + OffsetSize::usize_as(rows.len()));
let arrays = converter.convert_rows(rows)?;
let array = match arrays.first() {
@@ -538,6 +543,7 @@ fn general_array_distinct<OffsetSize: OffsetSizeTrait>(
Arc::clone(field),
offsets,
values,
- None,
+ // Keep the list nulls
+ array.nulls().cloned(),
)?))
}
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index a023e06030..dcceeebaf4 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -5674,6 +5674,13 @@ select array_distinct([sum(a)]) from t1 where a > 100
group by b;
statement ok
drop table t1;
+query ?
+select array_distinct(a) from values ([1, 2, 3]), (null), ([1, 3, 1]) as X(a);
+----
+[1, 2, 3]
+NULL
+[1, 3]
+
query ?
select array_distinct([]);
----
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]