This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new b2e8848406 feat: support `largelist` in `array_to_string` (#8729)
b2e8848406 is described below
commit b2e8848406b0884fbc631e130c3e25e35550a1a1
Author: Alex Huang <[email protected]>
AuthorDate: Sat Jan 6 05:09:24 2024 +0800
feat: support `largelist` in `array_to_string` (#8729)
* support largelist in array_to_string
* reduce code duplication
---
datafusion/physical-expr/src/array_expressions.rs | 94 ++++++++++++++++-------
datafusion/sqllogictest/test_files/array.slt | 61 +++++++++++++++
2 files changed, 128 insertions(+), 27 deletions(-)
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 78c490d5db..cb4ad3ed63 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -2022,8 +2022,21 @@ pub fn array_to_string(args: &[ArrayRef]) ->
Result<ArrayRef> {
) -> Result<&mut String> {
match arr.data_type() {
DataType::List(..) => {
- let list_array = downcast_arg!(arr, ListArray);
+ let list_array = as_list_array(&arr)?;
+ for i in 0..list_array.len() {
+ compute_array_to_string(
+ arg,
+ list_array.value(i),
+ delimiter.clone(),
+ null_string.clone(),
+ with_null_string,
+ )?;
+ }
+ Ok(arg)
+ }
+ DataType::LargeList(..) => {
+ let list_array = as_large_list_array(&arr)?;
for i in 0..list_array.len() {
compute_array_to_string(
arg,
@@ -2055,35 +2068,61 @@ pub fn array_to_string(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
}
- let mut arg = String::from("");
- let mut res: Vec<Option<String>> = Vec::new();
-
- match arr.data_type() {
- DataType::List(_) | DataType::LargeList(_) |
DataType::FixedSizeList(_, _) => {
- let list_array = arr.as_list::<i32>();
- for (arr, &delimiter) in list_array.iter().zip(delimiters.iter()) {
- if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
- arg = String::from("");
- let s = compute_array_to_string(
- &mut arg,
- arr,
- delimiter.to_string(),
- null_string.clone(),
- with_null_string,
- )?
- .clone();
-
- if let Some(s) = s.strip_suffix(delimiter) {
- res.push(Some(s.to_string()));
- } else {
- res.push(Some(s));
- }
+ fn generate_string_array<O: OffsetSizeTrait>(
+ list_arr: &GenericListArray<O>,
+ delimiters: Vec<Option<&str>>,
+ null_string: String,
+ with_null_string: bool,
+ ) -> Result<StringArray> {
+ let mut res: Vec<Option<String>> = Vec::new();
+ for (arr, &delimiter) in list_arr.iter().zip(delimiters.iter()) {
+ if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
+ let mut arg = String::from("");
+ let s = compute_array_to_string(
+ &mut arg,
+ arr,
+ delimiter.to_string(),
+ null_string.clone(),
+ with_null_string,
+ )?
+ .clone();
+
+ if let Some(s) = s.strip_suffix(delimiter) {
+ res.push(Some(s.to_string()));
} else {
- res.push(None);
+ res.push(Some(s));
}
+ } else {
+ res.push(None);
}
}
+
+ Ok(StringArray::from(res))
+ }
+
+ let arr_type = arr.data_type();
+ let string_arr = match arr_type {
+ DataType::List(_) | DataType::FixedSizeList(_, _) => {
+ let list_array = as_list_array(&arr)?;
+ generate_string_array::<i32>(
+ list_array,
+ delimiters,
+ null_string,
+ with_null_string,
+ )?
+ }
+ DataType::LargeList(_) => {
+ let list_array = as_large_list_array(&arr)?;
+ generate_string_array::<i64>(
+ list_array,
+ delimiters,
+ null_string,
+ with_null_string,
+ )?
+ }
_ => {
+ let mut arg = String::from("");
+ let mut res: Vec<Option<String>> = Vec::new();
// delimiter length is 1
assert_eq!(delimiters.len(), 1);
let delimiter = delimiters[0].unwrap();
@@ -2102,10 +2141,11 @@ pub fn array_to_string(args: &[ArrayRef]) ->
Result<ArrayRef> {
} else {
res.push(Some(s));
}
+ StringArray::from(res)
}
- }
+ };
- Ok(Arc::new(StringArray::from(res)))
+ Ok(Arc::new(string_arr))
}
/// Cardinality SQL function
diff --git a/datafusion/sqllogictest/test_files/array.slt
b/datafusion/sqllogictest/test_files/array.slt
index 774d67b4fd..083c4ff31b 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -3238,30 +3238,55 @@ select list_to_string(['h', 'e', 'l', 'l', 'o'], ','),
list_to_string([1, 2, 3,
----
h,e,l,l,o 1-2-3-4-5 1|2|3
+query TTT
+select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'],
'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5],
'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0],
'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
# array_join scalar function #5 (function alias `array_to_string`)
query TTT
select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5],
'-'), array_join([1.0, 2.0, 3.0], '|');
----
h,e,l,l,o 1-2-3-4-5 1|2|3
+query TTT
+select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'),
','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'),
array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
# list_join scalar function #6 (function alias `list_join`)
query TTT
select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5],
'-'), list_join([1.0, 2.0, 3.0], '|');
----
h,e,l,l,o 1-2-3-4-5 1|2|3
+query TTT
+select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'),
','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'),
list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
# array_to_string scalar function with nulls #1
query TTT
select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','),
array_to_string(make_array(1, NULL, 3, NULL, 5), '-'),
array_to_string(make_array(NULL, 2.0, 3.0), '|');
----
h,l,o 1-3-5 2|3
+query TTT
+select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'],
'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5],
'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0],
'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
# array_to_string scalar function with nulls #2
query TTT
select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'),
array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'),
array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
----
h,-,-,-,o nil-2-nil-4-5 1|0|3
+query TTT
+select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'),
'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2,
NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'),
array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'),
'|', '0');
+----
+h,-,-,-,o nil-2-nil-4-5 1|0|3
+
# array_to_string with columns #1
# For reference
@@ -3288,6 +3313,18 @@ NULL
51^52^54^55^56^57^58^59^60
NULL
+query T
+select array_to_string(column1, column4) from large_arrays_values;
+----
+2,3,4,5,6,7,8,9,10
+11.12.13.14.15.16.17.18.20
+21-22-23-25-26-27-28-29-30
+31ok32ok33ok34ok35ok37ok38ok39ok40
+NULL
+41$42$43$44$45$46$47$48$49$50
+51^52^54^55^56^57^58^59^60
+NULL
+
query TT
select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/')
from arrays_values;
----
@@ -3300,6 +3337,18 @@ NULL 1/2/3
51_52_54_55_56_57_58_59_60 1/2/3
61_62_63_64_65_66_67_68_69_70 1/2/3
+query TT
+select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/')
from large_arrays_values;
+----
+2_3_4_5_6_7_8_9_10 1/2/3
+11_12_13_14_15_16_17_18_20 1/2/3
+21_22_23_25_26_27_28_29_30 1/2/3
+31_32_33_34_35_37_38_39_40 1/2/3
+NULL 1/2/3
+41_42_43_44_45_46_47_48_49_50 1/2/3
+51_52_54_55_56_57_58_59_60 1/2/3
+61_62_63_64_65_66_67_68_69_70 1/2/3
+
query TT
select array_to_string(column1, '_', '*'),
array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values;
----
@@ -3312,6 +3361,18 @@ NULL 1.2.3
51_52_*_54_55_56_57_58_59_60 1.2.3
61_62_63_64_65_66_67_68_69_70 1.2.3
+query TT
+select array_to_string(column1, '_', '*'),
array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values;
+----
+*_2_3_4_5_6_7_8_9_10 1.2.3
+11_12_13_14_15_16_17_18_*_20 1.2.3
+21_22_23_*_25_26_27_28_29_30 1.2.3
+31_32_33_34_35_*_37_38_39_40 1.2.3
+NULL 1.2.3
+41_42_43_44_45_46_47_48_49_50 1.2.3
+51_52_*_54_55_56_57_58_59_60 1.2.3
+61_62_63_64_65_66_67_68_69_70 1.2.3
+
## cardinality
# cardinality scalar function