This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 311e8c7c7a Fix `make_array` null handling, update tests (#6900)
311e8c7c7a is described below

commit 311e8c7c7ab8c087f072eeac9335394c09ae8185
Author: Andrew Lamb <[email protected]>
AuthorDate: Tue Jul 11 05:33:36 2023 -0400

    Fix `make_array` null handling, update tests (#6900)
    
    * Fix `make_array` null handling, update tests
    
    * Apply suggestions from code review
---
 .../core/tests/sqllogictests/test_files/array.slt  | 92 +++++++++++++++-------
 datafusion/physical-expr/src/array_expressions.rs  | 77 +++++++++++++++++-
 2 files changed, 136 insertions(+), 33 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt 
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index b46875725f..cf20c14cac 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -29,17 +29,18 @@ CREATE TABLE values(
   b INT,
   c INT,
   d FLOAT,
-  e VARCHAR
+  e VARCHAR,
+  f VARCHAR
 ) AS VALUES
-  (1, 1, 2, 1.1, 'Lorem'),
-  (2, 3, 4, 2.2, 'ipsum'),
-  (3, 5, 6, 3.3, 'dolor'),
-  (4, 7, 8, 4.4, 'sit'),
-  (NULL, 9, 10, 5.5, 'amet'),
-  (5, NULL, 12, 6.6, ','),
-  (6, 11, NULL, 7.7, 'consectetur'),
-  (7, 13, 14, NULL, 'adipiscing'),
-  (8, 15, 16, 8.8, NULL)
+  (1,    1,    2,    1.1,  'Lorem',       'A'),
+  (2,    3,    4,    2.2,  'ipsum',       ''),
+  (3,    5,    6,    3.3,  'dolor',       'BB'),
+  (4,    7,    8,    4.4,  'sit',          NULL),
+  (NULL, 9,    10,   5.5,  'amet',        'CCC'),
+  (5,    NULL, 12,   6.6,  ',',           'DD'),
+  (6,    11,   NULL, 7.7,  'consectetur', 'E'),
+  (7,    13,   14,   NULL, 'adipiscing',  'F'),
+  (8,    15,   16,   8.8,   NULL,          '')
 ;
 
 statement ok
@@ -189,21 +190,35 @@ select make_array(NULL), make_array(NULL, NULL, NULL), 
make_array(make_array(NUL
 ----
 [] [] [[], []]
 
-# make_array with columns #1
-query ????
-select make_array(a), make_array(b, c), make_array(d), make_array(e) from 
values;
-----
-[1] [1, 2] [1.1] [Lorem]
-[2] [3, 4] [2.2] [ipsum]
-[3] [5, 6] [3.3] [dolor]
-[4] [7, 8] [4.4] [sit]
-[0] [9, 10] [5.5] [amet]
-[5] [0, 12] [6.6] [,]
-[6] [11, 0] [7.7] [consectetur]
-[7] [13, 14] [0.0] [adipiscing]
-[8] [15, 16] [8.8] []
-
-# make_array with columns #2
+# make_array with 1 columns
+query ???
+select make_array(a), make_array(d), make_array(e) from values;
+----
+[1] [1.1] [Lorem]
+[2] [2.2] [ipsum]
+[3] [3.3] [dolor]
+[4] [4.4] [sit]
+[] [5.5] [amet]
+[5] [6.6] [,]
+[6] [7.7] [consectetur]
+[7] [] [adipiscing]
+[8] [8.8] []
+
+# make_array with 2 columns #1
+query ??
+select make_array(b, c), make_array(e, f) from values;
+----
+[1, 2] [Lorem, A]
+[3, 4] [ipsum, ]
+[5, 6] [dolor, BB]
+[7, 8] [sit, ]
+[9, 10] [amet, CCC]
+[, 12] [,, DD]
+[11, ] [consectetur, E]
+[13, 14] [adipiscing, F]
+[15, 16] [, ]
+
+# make_array with 4 columns
 query ?
 select make_array(a, b, c, d) from values;
 ----
@@ -211,12 +226,31 @@ select make_array(a, b, c, d) from values;
 [2.0, 3.0, 4.0, 2.2]
 [3.0, 5.0, 6.0, 3.3]
 [4.0, 7.0, 8.0, 4.4]
-[0.0, 9.0, 10.0, 5.5]
-[5.0, 0.0, 12.0, 6.6]
-[6.0, 11.0, 0.0, 7.7]
-[7.0, 13.0, 14.0, 0.0]
+[, 9.0, 10.0, 5.5]
+[5.0, , 12.0, 6.6]
+[6.0, 11.0, , 7.7]
+[7.0, 13.0, 14.0, ]
 [8.0, 15.0, 16.0, 8.8]
 
+# make_array null handling
+query ?B?BB
+select
+  make_array(a), make_array(a)[1] IS NULL,
+  make_array(e, f), make_array(e, f)[1] IS NULL, make_array(e, f)[2] IS NULL
+from values;
+----
+[1] false [Lorem, A] false false
+[2] false [ipsum, ] false false
+[3] false [dolor, BB] false false
+[4] false [sit, ] false true
+[] true [amet, CCC] false false
+[5] false [,, DD] false false
+[6] false [consectetur, E] false false
+[7] false [adipiscing, F] false false
+[8] false [, ] true false
+
+
+
 ## array_append
 
 # array_append scalar function #2
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 6af87b506e..a0fed8f908 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -40,6 +40,11 @@ macro_rules! downcast_arg {
     }};
 }
 
+/// Downcasts multiple arguments into a single concrete type
+/// $ARGS:  &[ArrayRef]
+/// $ARRAY_TYPE: type to downcast to
+///
+/// $returns a Vec<$ARRAY_TYPE>
 macro_rules! downcast_vec {
     ($ARGS:expr, $ARRAY_TYPE:ident) => {{
         $ARGS
@@ -66,18 +71,38 @@ macro_rules! new_builder {
     }};
 }
 
+/// Combines multiple arrays into a single ListArray
+///
+/// $ARGS: slice of arrays, each with $ARRAY_TYPE
+/// $ARRAY_TYPE: the type of the list elements
+/// $BUILDER_TYPE: the type of ArrayBuilder for the list elements
+///
+/// Returns: a ListArray where the elements each have the same type as
+/// $ARRAY_TYPE and each element have a length of $ARGS.len()
 macro_rules! array {
     ($ARGS:expr, $ARRAY_TYPE:ident, $BUILDER_TYPE:ident) => {{
         let builder = new_builder!($BUILDER_TYPE, $ARGS[0].len());
         let mut builder =
             ListBuilder::<$BUILDER_TYPE>::with_capacity(builder, $ARGS.len());
 
+        let num_rows = $ARGS[0].len();
+        assert!(
+            $ARGS.iter().all(|a| a.len() == num_rows),
+            "all arguments must have the same number of rows"
+        );
+
         // for each entry in the array
-        for index in 0..$ARGS[0].len() {
+        for index in 0..num_rows {
+            // for each column
             for arg in $ARGS {
                 match arg.as_any().downcast_ref::<$ARRAY_TYPE>() {
+                    // Copy the source array value into the target ListArray
                     Some(arr) => {
-                        builder.values().append_value(arr.value(index));
+                        if arr.is_valid(index) {
+                            builder.values().append_value(arr.value(index));
+                        } else {
+                            builder.values().append_null();
+                        }
                     }
                     None => match arg.as_any().downcast_ref::<NullArray>() {
                         Some(arr) => {
@@ -179,6 +204,46 @@ fn compute_array_dims(arr: Option<ArrayRef>) -> 
Result<Option<Vec<Option<u64>>>>
     }
 }
 
+/// Convert one or more [`ArrayRef`] of the same type into a
+/// `ListArray`
+///
+/// # Example (non nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are non nested
+/// would return a single new `ListArray`, where each row was a list
+/// of 2 elements:
+///
+/// ```text
+/// ┌─────────┐   ┌─────────┐           ┌──────────────┐
+/// │ ┌─────┐ │   │ ┌─────┐ │           │ ┌──────────┐ │
+/// │ │  A  │ │   │ │  X  │ │           │ │  [A, X]  │ │
+/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
+/// │ │NULL │ │   │ │  Y  │ │──────────▶│ │[NULL, Y] │ │
+/// │ ├─────┤ │   │ ├─────┤ │           │ ├──────────┤ │
+/// │ │  C  │ │   │ │  Z  │ │           │ │  [C, Z]  │ │
+/// │ └─────┘ │   │ └─────┘ │           │ └──────────┘ │
+/// └─────────┘   └─────────┘           └──────────────┘
+///   col1           col2                    output
+/// ```
+///
+/// # Example (nested)
+///
+/// Calling `array(col1, col2)` where col1 and col2 are lists
+/// would return a single new `ListArray`, where each row was a list
+/// of the corresponding elements of col1 and col2 flattened.
+///
+/// ``` text
+/// ┌──────────────┐   ┌──────────────┐        ┌────────────────────────┐
+/// │ ┌──────────┐ │   │ ┌──────────┐ │        │ ┌────────────────────┐ │
+/// │ │  [A, X]  │ │   │ │    []    │ │        │ │       [A, X]       │ │
+/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────┤ │
+/// │ │[NULL, Y] │ │   │ │[Q, R, S] │ │───────▶│ │ [NULL, Y, Q, R, S] │ │
+/// │ ├──────────┤ │   │ ├──────────┤ │        │ ├────────────────────┤ │
+/// │ │  [C, Z]  │ │   │ │   NULL   │ │        │ │    [C, Z, NULL]    │ │
+/// │ └──────────┘ │   │ └──────────┘ │        │ └────────────────────┘ │
+/// └──────────────┘   └──────────────┘        └────────────────────────┘
+///      col1               col2                         output
+/// ```
 fn array_array(args: &[ArrayRef], data_type: DataType) -> Result<ArrayRef> {
     // do not accept 0 arguments.
     if args.is_empty() {
@@ -200,6 +265,7 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> 
Result<ArrayRef> {
             let mut mutable =
                 MutableArrayData::with_capacities(array_data, false, capacity);
 
+            // Copy over all the child data
             for (i, a) in arrays.iter().enumerate() {
                 mutable.extend(i, 0, a.len())
             }
@@ -239,8 +305,11 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> 
Result<ArrayRef> {
     Ok(res)
 }
 
-/// put values in an array.
-pub fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
+/// Convert one or more [`ColumnarValue`] of the same type into a
+/// `ListArray`
+///
+/// See [`array_array`] for more details.
+fn array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
     let arrays: Vec<ArrayRef> = values
         .iter()
         .map(|x| match x {

Reply via email to