This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 8284371cb5 feat: support 'LargeList' in `array_pop_front` and
`array_pop_back` (#8569)
8284371cb5 is described below
commit 8284371cb5dbeb5d0b1d50c420affb9be86b1599
Author: Alex Huang <[email protected]>
AuthorDate: Thu Dec 28 22:08:09 2023 +0100
feat: support 'LargeList' in `array_pop_front` and `array_pop_back` (#8569)
* support largelist in pop back
* support largelist in pop front
* add function comment
* use execution error
* use execution error
* spilit the general code
---
datafusion/physical-expr/src/array_expressions.rs | 90 +++++++++++++++++------
datafusion/sqllogictest/test_files/array.slt | 75 +++++++++++++++++++
2 files changed, 141 insertions(+), 24 deletions(-)
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 7a986810ba..250250630e 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -743,22 +743,78 @@ where
)?))
}
-/// array_pop_back SQL function
-pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
- if args.len() != 1 {
- return exec_err!("array_pop_back needs one argument");
- }
+fn general_pop_front_list<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+) -> Result<ArrayRef>
+where
+ i64: TryInto<O>,
+{
+ let from_array = Int64Array::from(vec![2; array.len()]);
+ let to_array = Int64Array::from(
+ array
+ .iter()
+ .map(|arr| arr.map_or(0, |arr| arr.len() as i64))
+ .collect::<Vec<i64>>(),
+ );
+ general_array_slice::<O>(array, &from_array, &to_array)
+}
- let list_array = as_list_array(&args[0])?;
- let from_array = Int64Array::from(vec![1; list_array.len()]);
+fn general_pop_back_list<O: OffsetSizeTrait>(
+ array: &GenericListArray<O>,
+) -> Result<ArrayRef>
+where
+ i64: TryInto<O>,
+{
+ let from_array = Int64Array::from(vec![1; array.len()]);
let to_array = Int64Array::from(
- list_array
+ array
.iter()
.map(|arr| arr.map_or(0, |arr| arr.len() as i64 - 1))
.collect::<Vec<i64>>(),
);
- let args = vec![args[0].clone(), Arc::new(from_array), Arc::new(to_array)];
- array_slice(args.as_slice())
+ general_array_slice::<O>(array, &from_array, &to_array)
+}
+
+/// array_pop_front SQL function
+pub fn array_pop_front(args: &[ArrayRef]) -> Result<ArrayRef> {
+ let array_data_type = args[0].data_type();
+ match array_data_type {
+ DataType::List(_) => {
+ let array = as_list_array(&args[0])?;
+ general_pop_front_list::<i32>(array)
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ general_pop_front_list::<i64>(array)
+ }
+ _ => exec_err!(
+ "array_pop_front does not support type: {:?}",
+ array_data_type
+ ),
+ }
+}
+
+/// array_pop_back SQL function
+pub fn array_pop_back(args: &[ArrayRef]) -> Result<ArrayRef> {
+ if args.len() != 1 {
+ return exec_err!("array_pop_back needs one argument");
+ }
+
+ let array_data_type = args[0].data_type();
+ match array_data_type {
+ DataType::List(_) => {
+ let array = as_list_array(&args[0])?;
+ general_pop_back_list::<i32>(array)
+ }
+ DataType::LargeList(_) => {
+ let array = as_large_list_array(&args[0])?;
+ general_pop_back_list::<i64>(array)
+ }
+ _ => exec_err!(
+ "array_pop_back does not support type: {:?}",
+ array_data_type
+ ),
+ }
}
/// Appends or prepends elements to a ListArray.
@@ -882,20 +938,6 @@ pub fn gen_range(args: &[ArrayRef]) -> Result<ArrayRef> {
Ok(arr)
}
-/// array_pop_front SQL function
-pub fn array_pop_front(args: &[ArrayRef]) -> Result<ArrayRef> {
- let list_array = as_list_array(&args[0])?;
- let from_array = Int64Array::from(vec![2; list_array.len()]);
- let to_array = Int64Array::from(
- list_array
- .iter()
- .map(|arr| arr.map_or(0, |arr| arr.len() as i64))
- .collect::<Vec<i64>>(),
- );
- let args = vec![args[0].clone(), Arc::new(from_array), Arc::new(to_array)];
- array_slice(args.as_slice())
-}
-
/// Array_append SQL function
pub fn array_append(args: &[ArrayRef]) -> Result<ArrayRef> {
if args.len() != 2 {
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 4c4adbabfd..b8d89edb49 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -994,18 +994,33 @@ select array_pop_back(make_array(1, 2, 3, 4, 5)),
array_pop_back(make_array('h',
----
[1, 2, 3, 4] [h, e, l, l]
+query ??
+select array_pop_back(arrow_cast(make_array(1, 2, 3, 4, 5),
'LargeList(Int64)')), array_pop_back(arrow_cast(make_array('h', 'e', 'l', 'l',
'o'), 'LargeList(Utf8)'));
+----
+[1, 2, 3, 4] [h, e, l, l]
+
# array_pop_back scalar function #2 (after array_pop_back, array is empty)
query ?
select array_pop_back(make_array(1));
----
[]
+query ?
+select array_pop_back(arrow_cast(make_array(1), 'LargeList(Int64)'));
+----
+[]
+
# array_pop_back scalar function #3 (array_pop_back the empty array)
query ?
select array_pop_back(array_pop_back(make_array(1)));
----
[]
+query ?
+select array_pop_back(array_pop_back(arrow_cast(make_array(1),
'LargeList(Int64)')));
+----
+[]
+
# array_pop_back scalar function #4 (array_pop_back the arrays which have NULL)
query ??
select array_pop_back(make_array(1, 2, 3, 4, NULL)),
array_pop_back(make_array(NULL, 'e', 'l', NULL, 'o'));
@@ -1018,24 +1033,44 @@ select array_pop_back(make_array(make_array(1, 2, 3),
make_array(2, 9, 1), make_
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4),
make_array(4, 5, 6)), 'LargeList(List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_back scalar function #6 (array_pop_back the nested arrays with
NULL)
query ?
select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL));
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), NULL),
'LargeList(List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_back scalar function #7 (array_pop_back the nested arrays with
NULL)
query ?
select array_pop_back(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), NULL, make_array(1, 7, 4)));
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3), make_array(2,
9, 1), make_array(7, 8, 9), NULL, make_array(1, 7, 4)),
'LargeList(List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], ]
+
# array_pop_back scalar function #8 (after array_pop_back, nested array is
empty)
query ?
select array_pop_back(make_array(make_array(1, 2, 3)));
----
[]
+query ?
+select array_pop_back(arrow_cast(make_array(make_array(1, 2, 3)),
'LargeList(List(Int64))'));
+----
+[]
+
# array_pop_back with columns
query ?
select array_pop_back(column1) from arrayspop;
@@ -1047,6 +1082,16 @@ select array_pop_back(column1) from arrayspop;
[]
[, 10, 11]
+query ?
+select array_pop_back(arrow_cast(column1, 'LargeList(Int64)')) from arrayspop;
+----
+[1, 2]
+[3, 4, 5]
+[6, 7, 8, ]
+[, ]
+[]
+[, 10, 11]
+
## array_pop_front (aliases: `list_pop_front`)
# array_pop_front scalar function #1
@@ -1055,36 +1100,66 @@ select array_pop_front(make_array(1, 2, 3, 4, 5)),
array_pop_front(make_array('h
----
[2, 3, 4, 5] [e, l, l, o]
+query ??
+select array_pop_front(arrow_cast(make_array(1, 2, 3, 4, 5),
'LargeList(Int64)')), array_pop_front(arrow_cast(make_array('h', 'e', 'l', 'l',
'o'), 'LargeList(Utf8)'));
+----
+[2, 3, 4, 5] [e, l, l, o]
+
# array_pop_front scalar function #2 (after array_pop_front, array is empty)
query ?
select array_pop_front(make_array(1));
----
[]
+query ?
+select array_pop_front(arrow_cast(make_array(1), 'LargeList(Int64)'));
+----
+[]
+
# array_pop_front scalar function #3 (array_pop_front the empty array)
query ?
select array_pop_front(array_pop_front(make_array(1)));
----
[]
+query ?
+select array_pop_front(array_pop_front(arrow_cast(make_array(1),
'LargeList(Int64)')));
+----
+[]
+
# array_pop_front scalar function #5 (array_pop_front the nested arrays)
query ?
select array_pop_front(make_array(make_array(1, 2, 3), make_array(2, 9, 1),
make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4), make_array(4, 5,
6)));
----
[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3),
make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7,
4), make_array(4, 5, 6)), 'LargeList(List(Int64))'));
+----
+[[2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4], [4, 5, 6]]
+
# array_pop_front scalar function #6 (array_pop_front the nested arrays with
NULL)
query ?
select array_pop_front(make_array(NULL, make_array(1, 2, 3), make_array(2, 9,
1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7, 4)));
----
[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+query ?
+select array_pop_front(arrow_cast(make_array(NULL, make_array(1, 2, 3),
make_array(2, 9, 1), make_array(7, 8, 9), make_array(1, 2, 3), make_array(1, 7,
4)), 'LargeList(List(Int64))'));
+----
+[[1, 2, 3], [2, 9, 1], [7, 8, 9], [1, 2, 3], [1, 7, 4]]
+
# array_pop_front scalar function #8 (after array_pop_front, nested array is
empty)
query ?
select array_pop_front(make_array(make_array(1, 2, 3)));
----
[]
+query ?
+select array_pop_front(arrow_cast(make_array(make_array(1, 2, 3)),
'LargeList(List(Int64))'));
+----
+[]
+
## array_slice (aliases: list_slice)
# array_slice scalar function #1 (with positive indexes)