This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new e02ed51b88 `make_array` with column of list (#7137)
e02ed51b88 is described below
commit e02ed51b880d09f2156bd192d283aef75cab04b7
Author: Jay Zhan <[email protected]>
AuthorDate: Mon Jul 31 19:11:13 2023 +0800
`make_array` with column of list (#7137)
* first draft
Signed-off-by: jayzhan211 <[email protected]>
* done
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
.../core/tests/sqllogictests/test_files/array.slt | 57 ++++++++++++++++------
datafusion/physical-expr/src/array_expressions.rs | 45 ++++++++++-------
datafusion/physical-expr/src/functions.rs | 4 +-
3 files changed, 73 insertions(+), 33 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index 2a1add0b13..2c7cfa7e7a 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -143,10 +143,10 @@ AS VALUES
statement ok
CREATE TABLE arrays_values_without_nulls
AS VALUES
- (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ','),
- (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.'),
- (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-'),
- (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok')
+ (make_array(1, 2, 3, 4, 5, 6, 7, 8, 9, 10), 1, 1, ',', [2,3]),
+ (make_array(11, 12, 13, 14, 15, 16, 17, 18, 19, 20), 12, 2, '.', [4,5]),
+ (make_array(21, 22, 23, 24, 25, 26, 27, 28, 29, 30), 23, 3, '-', [6,7]),
+ (make_array(31, 32, 33, 34, 35, 26, 37, 38, 39, 40), 34, 4, 'ok', [8,9])
;
statement ok
@@ -224,13 +224,13 @@ NULL [7, , 8] 13 [[, , 60]]
NULL NULL NULL NULL
# arrays_values_without_nulls table
-query ?II
-select column1, column2, column3 from arrays_values_without_nulls;
+query ?IIT
+select column1, column2, column3, column4 from arrays_values_without_nulls;
----
-[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1
-[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2
-[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3
-[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] 1 1 ,
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] 12 2 .
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] 23 3 -
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] 34 4 ok
# arrays_with_repeating_elements table
query ?III
@@ -380,7 +380,25 @@ from values;
[7] false [adipiscing, F] false false
[8] false [, ] true false
+# make_array with column of list
+query ??
+select column1, column5 from arrays_values_without_nulls;
+----
+[1, 2, 3, 4, 5, 6, 7, 8, 9, 10] [2, 3]
+[11, 12, 13, 14, 15, 16, 17, 18, 19, 20] [4, 5]
+[21, 22, 23, 24, 25, 26, 27, 28, 29, 30] [6, 7]
+[31, 32, 33, 34, 35, 26, 37, 38, 39, 40] [8, 9]
+query ???
+select make_array(column1),
+ make_array(column1, column5),
+ make_array(column1, make_array(50,51,52))
+from arrays_values_without_nulls;
+----
+[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]] [[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [2, 3]]
[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [50, 51, 52]]
+[[11, 12, 13, 14, 15, 16, 17, 18, 19, 20]] [[11, 12, 13, 14, 15, 16, 17, 18,
19, 20], [4, 5]] [[11, 12, 13, 14, 15, 16, 17, 18, 19, 20], [50, 51, 52]]
+[[21, 22, 23, 24, 25, 26, 27, 28, 29, 30]] [[21, 22, 23, 24, 25, 26, 27, 28,
29, 30], [6, 7]] [[21, 22, 23, 24, 25, 26, 27, 28, 29, 30], [50, 51, 52]]
+[[31, 32, 33, 34, 35, 26, 37, 38, 39, 40]] [[31, 32, 33, 34, 35, 26, 37, 38,
39, 40], [8, 9]] [[31, 32, 33, 34, 35, 26, 37, 38, 39, 40], [50, 51, 52]]
## array_append (aliases: `list_append`, `array_push_back`, `list_push_back`)
@@ -764,6 +782,7 @@ select array_concat(column3, make_array('.', '.', '.'))
from arrays;
# [11, 12] NULL NULL NULL
# NULL NULL NULL NULL
+
# array_concat column-wise #8 (1D + 1D)
query ?
select array_concat(column1, column2) from arrays_values_v2;
@@ -1753,10 +1772,8 @@ select make_array(1.0, '2', null)
----
[1.0, 2, ]
-
### FixedSizeListArray
-
statement ok
CREATE EXTERNAL TABLE fixed_size_list_array STORED AS PARQUET LOCATION
'tests/data/fixed_size_list_array.parquet';
@@ -1787,17 +1804,27 @@ select arrow_cast(f0, 'List(Int64)') from
fixed_size_list_array;
query ?
select make_array(arrow_cast(f0, 'List(Int64)')) from fixed_size_list_array
----
-[[1, 2], [3, 4]]
+[[1, 2]]
+[[3, 4]]
query ?
select make_array(f0) from fixed_size_list_array
----
-[[1, 2], [3, 4]]
+[[1, 2]]
+[[3, 4]]
+query ?
+select array_concat(column1, [7]) from arrays_values_v2;
+----
+[, 2, 3, 7]
+[7]
+[9, , 10, 7]
+[, 1, 7]
+[11, 12, 7]
+[7]
### Delete tables
-
statement ok
drop table values;
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 01b9ac95b4..6940456657 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -264,30 +264,33 @@ fn array_array(args: &[ArrayRef], data_type: DataType) ->
Result<ArrayRef> {
DataType::List(..) => {
let arrays =
downcast_vec!(args,
ListArray).collect::<Result<Vec<&ListArray>>>()?;
- let len = arrays.iter().map(|arr| arr.len() as i32).sum();
- let capacity =
- Capacities::Array(arrays.iter().map(|a|
a.get_array_memory_size()).sum());
- let array_data: Vec<_> =
- arrays.iter().map(|a| a.to_data()).collect::<Vec<_>>();
+
+ // Assume number of rows is the same for all arrays
+ let row_count = arrays[0].len();
+
+ let capacity = Capacities::Array(arrays.iter().map(|a|
a.len()).sum());
+ let array_data = arrays.iter().map(|a|
a.to_data()).collect::<Vec<_>>();
let array_data = array_data.iter().collect();
let mut mutable =
- MutableArrayData::with_capacities(array_data, false, capacity);
+ MutableArrayData::with_capacities(array_data, true, capacity);
- // Copy over all the child data
- for (i, a) in arrays.iter().enumerate() {
- mutable.extend(i, 0, a.len())
+ for i in 0..row_count {
+ for (j, _) in arrays.iter().enumerate() {
+ mutable.extend(j, i, i + 1);
+ }
}
-
let list_data_type =
DataType::List(Arc::new(Field::new("item", data_type, true)));
+ let offsets: Vec<i32> = (0..row_count as i32 + 1)
+ .map(|i| i * arrays.len() as i32)
+ .collect();
+
let list_data = ArrayData::builder(list_data_type)
- .len(1)
- .add_buffer(Buffer::from_slice_ref([0, len]))
+ .len(row_count)
+ .buffers(vec![Buffer::from_vec(offsets)])
.add_child_data(mutable.freeze())
- .build()
- .unwrap();
-
+ .build()?;
Arc::new(ListArray::from(list_data))
}
DataType::Utf8 => array!(args, StringArray, StringBuilder),
@@ -348,8 +351,16 @@ fn array(values: &[ColumnarValue]) ->
Result<ColumnarValue> {
}
/// `make_array` SQL function
-pub fn make_array(values: &[ColumnarValue]) -> Result<ColumnarValue> {
- array(values)
+pub fn make_array(arrays: &[ArrayRef]) -> Result<ArrayRef> {
+ let values: Vec<ColumnarValue> = arrays
+ .iter()
+ .map(|x| ColumnarValue::Array(x.clone()))
+ .collect();
+
+ match array(values.as_slice())? {
+ ColumnarValue::Array(array) => Ok(array),
+ ColumnarValue::Scalar(scalar) => Ok(scalar.to_array().clone()),
+ }
}
macro_rules! append {
diff --git a/datafusion/physical-expr/src/functions.rs
b/datafusion/physical-expr/src/functions.rs
index 948cb4ec47..c9683d2cdb 100644
--- a/datafusion/physical-expr/src/functions.rs
+++ b/datafusion/physical-expr/src/functions.rs
@@ -468,7 +468,9 @@ pub fn create_physical_fun(
BuiltinScalarFunction::Cardinality => {
Arc::new(|args|
make_scalar_function(array_expressions::cardinality)(args))
}
- BuiltinScalarFunction::MakeArray =>
Arc::new(array_expressions::make_array),
+ BuiltinScalarFunction::MakeArray => {
+ Arc::new(|args|
make_scalar_function(array_expressions::make_array)(args))
+ }
BuiltinScalarFunction::TrimArray => {
Arc::new(|args|
make_scalar_function(array_expressions::trim_array)(args))
}