alamb commented on code in PR #7897:
URL: https://github.com/apache/arrow-datafusion/pull/7897#discussion_r1390391943
##########
datafusion/physical-expr/src/array_expressions.rs:
##########
@@ -1358,6 +1361,86 @@ macro_rules! to_string {
}};
}
+fn union_generic_lists<OffsetSize: OffsetSizeTrait>(
+ l: &GenericListArray<OffsetSize>,
+ r: &GenericListArray<OffsetSize>,
+ field: &FieldRef,
+) -> Result<GenericListArray<OffsetSize>> {
+ let converter =
RowConverter::new(vec![SortField::new(l.value_type().clone())])?;
+
+ let nulls = NullBuffer::union(l.nulls(), r.nulls());
+ let l_values = l.values().clone();
+ let r_values = r.values().clone();
+ let l_values = converter.convert_columns(&[l_values])?;
+ let r_values = converter.convert_columns(&[r_values])?;
+
+ // Might be worth adding an upstream OffsetBufferBuilder
+ let mut offsets = Vec::<OffsetSize>::with_capacity(l.len() + 1);
Review Comment:
This is a really neat implementation @edmondop.
##########
datafusion/sqllogictest/test_files/array.slt:
##########
@@ -1752,6 +1752,101 @@ select array_to_string(make_array(), ',')
----
(empty)
+
+## array_union (aliases: `list_union`)
+
+# array_union scalar function #1
+query ?
+select array_union([1, 2, 3, 4], [5, 6, 3, 4]);
+----
+[1, 2, 3, 4, 5, 6]
+
+# array_union scalar function #2
+query ?
+select array_union([1, 2, 3, 4], [5, 6, 7, 8]);
+----
+[1, 2, 3, 4, 5, 6, 7, 8]
+
+# array_union scalar function #3
+query ?
+select array_union([1,2,3], []);
+----
+[1, 2, 3]
+
+# array_union scalar function #4
+query ?
+select array_union([1, 2, 3, 4], [5, 4]);
+----
+[1, 2, 3, 4, 5]
+
+# array_union scalar function #5
+statement ok
+CREATE TABLE arrays_with_repeating_elements_for_union
+AS VALUES
+ ([1], [2]),
+ ([2, 3], [3]),
+ ([3], [3, 4])
+;
+
+query ?
+select array_union(column1, column2) from
arrays_with_repeating_elements_for_union;
+----
+[1, 2]
+[2, 3]
+[3, 4]
+
+statement ok
+drop table arrays_with_repeating_elements_for_union;
+
+# array_union scalar function #6
+query ?
+select array_union([], []);
Review Comment:
> I am even surprised my code work here tbh, do I need to add an additional
branch to pattern matching where both array have null data type and return an
empty array?
I don't think you should check the input types (they should still be lists,
they'll just have an offset buffer of `[0, 1]`
Maybe you just need to handle the case specially in `array_union`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]