This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new e02ed51b88 `make_array` with column of list (#7137)
e02ed51b88 is described below

commit e02ed51b880d09f2156bd192d283aef75cab04b7
Author: Jay Zhan <[email protected]>
AuthorDate: Mon Jul 31 19:11:13 2023 +0800

    `make_array` with column of list (#7137)
    
    * first draft
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * done
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
---
 .../core/tests/sqllogictests/test_files/array.slt  | 57 ++++++++++++++++------
 datafusion/physical-expr/src/array_expressions.rs  | 45 ++++++++++-------
 datafusion/physical-expr/src/functions.rs          |  4 +-
 3 files changed, 73 insertions(+), 33 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt 
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 2a1add0b13..2c7cfa7e7a 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -143,10 +143,10 @@ AS VALUES
 statement ok
 CREATE TABLE arrays_values_without_nulls
 AS VALUES
-  (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
-  (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
-  (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
-  (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+  (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]),
+  (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]),
+  (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]),
+  (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9])
 ;
 
 statement ok
@@ -224,13 +224,13 @@ NULL [7, , 8] 13 [[, , 60]]
 NULL NULL NULL NULL
 
 # arrays_values_without_nulls table
-query ?II
-select column1, column2, column3 from arrays_values_without_nulls;
+query ?IIT
+select column1, column2, column3, column4 from arrays_values_without_nulls;
 ----
-[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
-[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
-[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
-[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 .
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok
 
 # arrays_with_repeating_elements table
 query ?III
@@ -380,7 +380,25 @@ from values;
 [7] false [adipiscing, F] false false
 [8] false [, ] true false
 
+# make_array with column of list
+query ??
+select column1, column5 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3]
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5]
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7]
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9]
 
+query ???
+select make_array(column1),
+       make_array(column1, column5),
+       make_array(column1, make_array(50,51,52))
+from arrays_values_without_nulls;
+----
+[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]] 
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]]
+[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18, 
19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]]
+[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28, 
29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]]
+[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38, 
39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]]
 
 ## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`)
 
@@ -764,6 +782,7 @@ select array_concat(column3, make_array('.', '.', '.')) 
from arrays;
 # [11, 12] NULL NULL NULL
 # NULL NULL NULL NULL
 
+
 # array_concat column-wise #8 (1D + 1D)
 query ?
 select array_concat(column1, column2) from arrays_values_v2;
@@ -1753,10 +1772,8 @@ select make_array(1.0, '2', null)
 ----
 [1.0, 2, ]
 
-
 ### FixedSizeListArray
 
-
 statement ok
 CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION 
'tests/data/fixed_size_list_array.parquet';
 
@@ -1787,17 +1804,27 @@ select arrow_cast(f0, 'List(Int64)') from 
fixed_size_list_array;
 query ?
 select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
 ----
-[[1, 2], [3, 4]]
+[[1, 2]]
+[[3, 4]]
 
 query ?
 select make_array(f0) from fixed_size_list_array
 ----
-[[1, 2], [3, 4]]
+[[1, 2]]
+[[3, 4]]
 
+query ?
+select array_concat(column1, [7]) from arrays_values_v2;
+----
+[, 2, 3, 7]
+[7]
+[9, , 10, 7]
+[, 1, 7]
+[11, 12, 7]
+[7]
 
 ### Delete tables
 
-
 statement ok
 drop table values;
 
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 01b9ac95b4..6940456657 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -264,30 +264,33 @@ fn array_array(args: &[ArrayRef], data_type: DataType) -> 
Result<ArrayRef> {
         DataType::List(..) => {
             let arrays =
                 downcast_vec!(args, 
ListArray).collect::<Result<Vec<&ListArray>>>()?;
-            let len = arrays.iter().map(|arr| arr.len() as i32).sum();
-            let capacity =
-                Capacities::Array(arrays.iter().map(|a| 
a.get_array_memory_size()).sum());
-            let array_data: Vec<_> =
-                arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
+
+            // Assume number of rows is the same for all arrays
+            let row_count = arrays[0].len();
+
+            let capacity = Capacities::Array(arrays.iter().map(|a| 
a.len()).sum());
+            let array_data = arrays.iter().map(|a| 
a.to_data()).collect::<Vec<_>>();
             let array_data = array_data.iter().collect();
             let mut mutable =
-                MutableArrayData::with_capacities(array_data, false, capacity);
+                MutableArrayData::with_capacities(array_data, true, capacity);
 
-            // Copy over all the child data
-            for (i, a) in arrays.iter().enumerate() {
-                mutable.extend(i, 0, a.len())
+            for i in 0..row_count {
+                for (j, _) in arrays.iter().enumerate() {
+                    mutable.extend(j, i, i + 1);
+                }
             }
-
             let list_data_type =
                 DataType::List(Arc::new(Field::new("item", data_type, true)));
 
+            let offsets: Vec<i32> = (0..row_count as i32 + 1)
+                .map(|i| i * arrays.len() as i32)
+                .collect();
+
             let list_data = ArrayData::builder(list_data_type)
-                .len(1)
-                .add_buffer(Buffer::from_slice_ref([0, len]))
+                .len(row_count)
+                .buffers(vec![Buffer::from_vec(offsets)])
                 .add_child_data(mutable.freeze())
-                .build()
-                .unwrap();
-
+                .build()?;
             Arc::new(ListArray::from(list_data))
         }
         DataType::Utf8 => array!(args, StringArray, StringBuilder),
@@ -348,8 +351,16 @@ fn array(values: &[ColumnarValue]) -> 
Result<ColumnarValue> {
 }
 
 /// `make_array` SQL function
-pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
-    array(values)
+pub fn make_array(arrays: &[ArrayRef]) -> Result<ArrayRef> {
+    let values: Vec<ColumnarValue> = arrays
+        .iter()
+        .map(|x| ColumnarValue::Array(x.clone()))
+        .collect();
+
+    match array(values.as_slice())? {
+        ColumnarValue::Array(array) => Ok(array),
+        ColumnarValue::Scalar(scalar) => Ok(scalar.to_array().clone()),
+    }
 }
 
 macro_rules! append {
diff --git a/datafusion/physical-expr/src/functions.rs 
b/datafusion/physical-expr/src/functions.rs
index 948cb4ec47..c9683d2cdb 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -468,7 +468,9 @@ pub fn create_physical_fun(
         BuiltinScalarFunction::Cardinality => {
             Arc::new(|args| 
make_scalar_function(array_expressions::cardinality)(args))
         }
-        BuiltinScalarFunction::MakeArray => 
Arc::new(array_expressions::make_array),
+        BuiltinScalarFunction::MakeArray => {
+            Arc::new(|args| 
make_scalar_function(array_expressions::make_array)(args))
+        }
         BuiltinScalarFunction::TrimArray => {
             Arc::new(|args| 
make_scalar_function(array_expressions::trim_array)(args))
         }

Reply via email to