This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 311e8c7c7a Fix `make_array` null handling, update tests (#6900)
311e8c7c7a is described below
commit 311e8c7c7ab8c087f072eeac9335394c09ae8185
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jul 11 05:33:36 2023 -0400
Fix `make_array` null handling, update tests (#6900)
* Fix `make_array` null handling, update tests
* Apply suggestions from code review
---
.../core/tests/sqllogictests/test_files/array.slt | 92 +++++++++++++++-------
datafusion/physical-expr/src/array_expressions.rs | 77 +++++++++++++++++-
2 files changed, 136 insertions(+), 33 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index b46875725f..cf20c14cac 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -29,17 +29,18 @@ CREATE TABLE values(
b INT,
c INT,
d FLOAT,
- e VARCHAR
+ e VARCHAR,
+ f VARCHAR
) AS VALUES
- (1, 1, 2, 1.1, 'Lorem'),
- (2, 3, 4, 2.2, 'ipsum'),
- (3, 5, 6, 3.3, 'dolor'),
- (4, 7, 8, 4.4, 'sit'),
- (NULL, 9, 10, 5.5, 'amet'),
- (5, NULL, 12, 6.6, ','),
- (6, 11, NULL, 7.7, 'consectetur'),
- (7, 13, 14, NULL, 'adipiscing'),
- (8, 15, 16, 8.8, NULL)
+ (1, 1, 2, 1.1, 'Lorem', 'A'),
+ (2, 3, 4, 2.2, 'ipsum', ''),
+ (3, 5, 6, 3.3, 'dolor', 'BB'),
+ (4, 7, 8, 4.4, 'sit', NULL),
+ (NULL, 9, 10, 5.5, 'amet', 'CCC'),
+ (5, NULL, 12, 6.6, ',', 'DD'),
+ (6, 11, NULL, 7.7, 'consectetur', 'E'),
+ (7, 13, 14, NULL, 'adipiscing', 'F'),
+ (8, 15, 16, 8.8, NULL, '')
;
statement ok
@@ -189,21 +190,35 @@ select make_array(NULL), make_array(NULL, NULL, NULL),
make_array(make_array(NUL
----
[] [] [[], []]
-# make_array with columns #1
-query ????
-select make_array(a), make_array(b, c), make_array(d), make_array(e) from
values;
-----
-[1] [1, 2] [1.1] [Lorem]
-[2] [3, 4] [2.2] [ipsum]
-[3] [5, 6] [3.3] [dolor]
-[4] [7, 8] [4.4] [sit]
-[0] [9, 10] [5.5] [amet]
-[5] [0, 12] [6.6] [,]
-[6] [11, 0] [7.7] [consectetur]
-[7] [13, 14] [0.0] [adipiscing]
-[8] [15, 16] [8.8] []
-
-# make_array with columns #2
+# make_array with 1 columns
+query ???
+select make_array(a), make_array(d), make_array(e) from values;
+----
+[1] [1.1] [Lorem]
+[2] [2.2] [ipsum]
+[3] [3.3] [dolor]
+[4] [4.4] [sit]
+[] [5.5] [amet]
+[5] [6.6] [,]
+[6] [7.7] [consectetur]
+[7] [] [adipiscing]
+[8] [8.8] []
+
+# make_array with 2 columns #1
+query ??
+select make_array(b, c), make_array(e, f) from values;
+----
+[1, 2] [Lorem, A]
+[3, 4] [ipsum, ]
+[5, 6] [dolor, BB]
+[7, 8] [sit, ]
+[9, 10] [amet, CCC]
+[, 12] [,, DD]
+[11, ] [consectetur, E]
+[13, 14] [adipiscing, F]
+[15, 16] [, ]
+
+# make_array with 4 columns
query ?
select make_array(a, b, c, d) from values;
----
@@ -211,12 +226,31 @@ select make_array(a, b, c, d) from values;
[2.0, 3.0, 4.0, 2.2]
[3.0, 5.0, 6.0, 3.3]
[4.0, 7.0, 8.0, 4.4]
-[0.0, 9.0, 10.0, 5.5]
-[5.0, 0.0, 12.0, 6.6]
-[6.0, 11.0, 0.0, 7.7]
-[7.0, 13.0, 14.0, 0.0]
+[, 9.0, 10.0, 5.5]
+[5.0, , 12.0, 6.6]
+[6.0, 11.0, , 7.7]
+[7.0, 13.0, 14.0, ]
[8.0, 15.0, 16.0, 8.8]
+# make_array null handling
+query ?B?BB
+select
+ make_array(a), make_array(a)[1] IS NULL,
+ make_array(e, f), make_array(e, f)[1] IS NULL, make_array(e, f)[2] IS NULL
+from values;
+----
+[1] false [Lorem, A] false false
+[2] false [ipsum, ] false false
+[3] false [dolor, BB] false false
+[4] false [sit, ] false true
+[] true [amet, CCC] false false
+[5] false [,, DD] false false
+[6] false [consectetur, E] false false
+[7] false [adipiscing, F] false false
+[8] false [, ] true false
+
+
+
## array_append
# array_append scalar function #2
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 6af87b506e..a0fed8f908 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -40,6 +40,11 @@ macro_rules! downcast_arg {
}};
}
+/// Downcasts multiple arguments into a single concrete type
+/// $ARGS: &[ArrayRef]
+/// $ARRAY_TYPE: type to downcast to
+///
+/// $returns a Vec<$ARRAY_TYPE>
macro_rules! downcast_vec {
($ARGS:expr, $ARRAY_TYPE:ident) => {{
$ARGS
@@ -66,18 +71,38 @@ macro_rules! new_builder {
}};
}
+/// Combines multiple arrays into a single ListArray
+///
+/// $ARGS: slice of arrays, each with $ARRAY_TYPE
+/// $ARRAY_TYPE: the type of the list elements
+/// $BUILDER_TYPE: the type of ArrayBuilder for the list elements
+///
+/// Returns: a ListArray where the elements each have the same type as
+/// $ARRAY_TYPE and each element have a length of $ARGS.len()
macro_rules! array {
($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len());
let mut builder =
ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len());
+ let num_rows = $ARGS[0].len();
+ assert!(
+ $ARGS.iter().all(|a| a.len() == num_rows),
+ "all arguments must have the same number of rows"
+ );
+
// for each entry in the array
- for index in 0..$ARGS[0].len() {
+ for index in 0..num_rows {
+ // for each column
for arg in $ARGS {
match arg.as_any().downcast_ref::<$ARRAY_TYPE>() {
+ // Copy the source array value into the target ListArray
Some(arr) => {
- builder.values().append_value(arr.value(index));
+ if arr.is_valid(index) {
+ builder.values().append_value(arr.value(index));
+ } else {
+ builder.values().append_null();
+ }
}
None => match arg.as_any().downcast_ref::<NullArray>() {
Some(arr) => {
@@ -179,6 +204,46 @@ fn compute_array_dims(arr: Option<ArrayRef>) ->
Result<Option<Vec<Option<u64>>>>
}
}
+/// Convert one or more [`ArrayRef`] of the same type into a
+/// `ListArray`
+///
+/// # Example (non nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are non nested
+/// would return a single new `ListArray`, where each row was a list
+/// of 2 elements:
+///
+/// ```text
+/// ┌─────────┐ ┌─────────┐ ┌──────────────┐
+/// │ ┌─────┐ │ │ ┌─────┐ │ │ ┌──────────┐ │
+/// │ │ A │ │ │ │ X │ │ │ │ [A, X] │ │
+/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
+/// │ │NULL │ │ │ │ Y │ │──────────▶│ │[NULL, Y] │ │
+/// │ ├─────┤ │ │ ├─────┤ │ │ ├──────────┤ │
+/// │ │ C │ │ │ │ Z │ │ │ │ [C, Z] │ │
+/// │ └─────┘ │ │ └─────┘ │ │ └──────────┘ │
+/// └─────────┘ └─────────┘ └──────────────┘
+/// col1 col2 output
+/// ```
+///
+/// # Example (nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are lists
+/// would return a single new `ListArray`, where each row was a list
+/// of the corresponding elements of col1 and col2 flattened.
+///
+/// ``` text
+/// ┌──────────────┐ ┌──────────────┐ ┌────────────────────────┐
+/// │ ┌──────────┐ │ │ ┌──────────┐ │ │ ┌────────────────────┐ │
+/// │ │ [A, X] │ │ │ │ [] │ │ │ │ [A, X] │ │
+/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
+/// │ │[NULL, Y] │ │ │ │[Q, R, S] │ │───────▶│ │ [NULL, Y, Q, R, S] │ │
+/// │ ├──────────┤ │ │ ├──────────┤ │ │ ├────────────────────┤ │
+/// │ │ [C, Z] │ │ │ │ NULL │ │ │ │ [C, Z, NULL] │ │
+/// │ └──────────┘ │ │ └──────────┘ │ │ └────────────────────┘ │
+/// └──────────────┘ └──────────────┘ └────────────────────────┘
+/// col1 col2 output
+/// ```
fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
// do not accept 0 arguments.
if args.is_empty() {
@@ -200,6 +265,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) ->
Result<ArrayRef> {
let mut mutable =
MutableArrayData::with_capacities(array_data, false, capacity);
+ // Copy over all the child data
for (i, a) in arrays.iter().enumerate() {
mutable.extend(i, 0, a.len())
}
@@ -239,8 +305,11 @@ fn array_array(args: &[ArrayRef], data_type: DataType) ->
Result<ArrayRef> {
Ok(res)
}
-/// put values in an array.
-pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
+/// Convert one or more [`ColumnarValue`] of the same type into a
+/// `ListArray`
+///
+/// See [`array_array`] for more details.
+fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
let arrays: Vec<ArrayRef> = values
.iter()
.map(|x| match x {