This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 49583bd501 Column support for array_to_string (#6940)
49583bd501 is described below
commit 49583bd5010282ca126e75100dce958aa346e5ee
Author: Jay Zhan <[email protected]>
AuthorDate: Sun Jul 16 21:17:35 2023 +0800
Column support for array_to_string (#6940)
* revert array.slt that changed by #6595
Signed-off-by: jayzhan211 <[email protected]>
* add test for to string
Signed-off-by: jayzhan211 <[email protected]>
* first draft
Signed-off-by: jayzhan211 <[email protected]>
* cleanup
Signed-off-by: jayzhan211 <[email protected]>
---------
Signed-off-by: jayzhan211 <[email protected]>
---
.../core/tests/sqllogictests/test_files/array.slt | 112 +++++++++++++++------
datafusion/physical-expr/src/array_expressions.rs | 67 +++++++++---
2 files changed, 131 insertions(+), 48 deletions(-)
diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index d9b3449dfe..f0f50ccc93 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -365,22 +365,22 @@ select array_prepend(100.1, column2), array_prepend('.',
column3) from arrays;
## array_fill
# array_fill scalar function #1
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query ???
select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)),
array_fill(2, make_array(2));
+----
+[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2]
# array_fill scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query ??
select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2,
2, 2));
+----
+[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2],
[2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2],
[2, 2]]]]]
# array_fill scalar function #3
-query error DataFusion error: SQL error: TokenizerError\("Unterminated string
literal at Line: 2, Column 856"\)
-caused by
-Internal error: Optimizer rule 'simplify_expressions' failed, due to generate
a different schema, original schema: DFSchema \{ fields: \[DFField \{
qualifier: None, field: Field \{ name:
"array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DF
[...]
+query ?
select array_fill(1, make_array())
+----
+[]
## array_concat
@@ -661,16 +661,16 @@ select array_to_string(['h', 'e', 'l', 'l', 'o'], ','),
array_to_string([1, 2, 3
h,e,l,l,o 1-2-3-4-5 1|2|3
# array_to_string scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of
Int64 type
+query TTT
select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5,
6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\');
+----
+11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3
# array_to_string scalar function #3
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert Utf8 to List\(Field \{
name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered:
false, metadata: \{\} \}\)
+query T
select array_to_string(make_array(), ',')
+----
+(empty)
# array_to_string scalar function with nulls #1
query TTT
@@ -684,6 +684,56 @@ select array_to_string(make_array('h', NULL, NULL, NULL,
'o'), ',', '-'), array_
----
h,-,-,-,o nil-2-nil-4-5 1|0|3
+# array_to_string with columns #1
+
+# For reference
+# select column1, column4 from arrays_values;
+# ----
+# [, 2, 3, 4, 5, 6, 7, 8, 9, 10] ,
+# [11, 12, 13, 14, 15, 16, 17, 18, , 20] .
+# [21, 22, 23, , 25, 26, 27, 28, 29, 30] -
+# [31, 32, 33, 34, 35, , 37, 38, 39, 40] ok
+# NULL @
+# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $
+# [51, 52, , 54, 55, 56, 57, 58, 59, 60] ^
+# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL
+
+query T
+select array_to_string(column1, column4) from arrays_values;
+----
+2,3,4,5,6,7,8,9,10
+11.12.13.14.15.16.17.18.20
+21-22-23-25-26-27-28-29-30
+31ok32ok33ok34ok35ok37ok38ok39ok40
+NULL
+41$42$43$44$45$46$47$48$49$50
+51^52^54^55^56^57^58^59^60
+NULL
+
+query TT
+select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/')
from arrays_values;
+----
+2_3_4_5_6_7_8_9_10 1/2/3
+11_12_13_14_15_16_17_18_20 1/2/3
+21_22_23_25_26_27_28_29_30 1/2/3
+31_32_33_34_35_37_38_39_40 1/2/3
+NULL 1/2/3
+41_42_43_44_45_46_47_48_49_50 1/2/3
+51_52_54_55_56_57_58_59_60 1/2/3
+61_62_63_64_65_66_67_68_69_70 1/2/3
+
+query TT
+select array_to_string(column1, '_', '*'),
array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values;
+----
+*_2_3_4_5_6_7_8_9_10 1.2.3
+11_12_13_14_15_16_17_18_*_20 1.2.3
+21_22_23_*_25_26_27_28_29_30 1.2.3
+31_32_33_34_35_*_37_38_39_40 1.2.3
+NULL 1.2.3
+41_42_43_44_45_46_47_48_49_50 1.2.3
+51_52_*_54_55_56_57_58_59_60 1.2.3
+61_62_63_64_65_66_67_68_69_70 1.2.3
+
## cardinality
# cardinality scalar function
@@ -693,10 +743,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)),
cardinality([1, 3, 5]), cardinali
5 3 5
# cardinality scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query II
select cardinality(make_array([1, 2], [3, 4], [5, 6])),
cardinality(array_fill(3, array[3, 2, 3]));
+----
+6 18
# cardinality scalar function #3
query II
@@ -725,10 +775,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2),
trim_array(['h', 'e', 'l', 'l',
[1, 2, 3] [h, e] [1.0]
# trim_array scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query ??
select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3,
4, 2]), 2);
+----
+[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]
# trim_array scalar function #3
query ?
@@ -763,10 +813,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2),
array_length(make_array(1, 2,
NULL NULL 2
# array_length scalar function #4
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query IIII
select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3,
[3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3),
array_length(array_fill(3, [3, 2, 5]), 4);
+----
+3 2 5 NULL
# array_length scalar function #5
query III
@@ -809,10 +859,10 @@ select array_dims(make_array(1, 2, 3)),
array_dims(make_array([1, 2], [3, 4])),
[3] [2, 2] [1, 1, 1, 2, 1]
# array_dims scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query ??
select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5,
4]));
+----
+[1, 2, 3] [2, 5, 4]
# array_dims scalar function #3
query ??
@@ -841,10 +891,10 @@ select array_ndims(make_array(1, 2, 3)),
array_ndims(make_array([1, 2], [3, 4]))
1 2 5
# array_ndims scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name:
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name:
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false,
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable:
true, dict_id: 0, dict_is_ordered: [...]
+query II
select array_ndims(array_fill(1, [1, 2, 3])),
array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
+----
+3 21
# array_ndims scalar function #3
query II
@@ -926,10 +976,8 @@ select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0,
3.0, 4.0), 'h' || make_a
----
[1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
-
### Array casting tests
-
## make_array
# make_array scalar function #1
diff --git a/datafusion/physical-expr/src/array_expressions.rs
b/datafusion/physical-expr/src/array_expressions.rs
index 93c1626daf..d62bcbd119 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -1064,7 +1064,6 @@ macro_rules! to_string {
}
}
}
-
Ok($ARG)
}};
}
@@ -1072,9 +1071,10 @@ macro_rules! to_string {
/// Array_to_string SQL function
pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
let arr = &args[0];
- let delimeter = as_generic_string_array::<i32>(&args[1])?
- .value(0)
- .to_string();
+
+ let delimeters = as_generic_string_array::<i32>(&args[1])?;
+ let delimeters: Vec<Option<&str>> = delimeters.iter().collect();
+
let mut null_string = String::from("");
let mut with_null_string = false;
if args.len() == 3 {
@@ -1219,21 +1219,56 @@ pub fn array_to_string(args: &[ArrayRef]) ->
Result<ArrayRef> {
}
let mut arg = String::from("");
- let mut res = compute_array_to_string(
- &mut arg,
- arr.clone(),
- delimeter.clone(),
- null_string,
- with_null_string,
- )?
- .clone();
- match res.as_str() {
- "" => Ok(Arc::new(StringArray::from(vec![Some(res)]))),
+ let mut res: Vec<Option<String>> = Vec::new();
+
+ match arr.data_type() {
+ DataType::List(_) | DataType::LargeList(_) |
DataType::FixedSizeList(_, _) => {
+ let list_array = arr.as_list::<i32>();
+ for (arr, &delimeter) in list_array.iter().zip(delimeters.iter()) {
+ if let (Some(arr), Some(delimeter)) = (arr, delimeter) {
+ arg = String::from("");
+ let s = compute_array_to_string(
+ &mut arg,
+ arr,
+ delimeter.to_string(),
+ null_string.clone(),
+ with_null_string,
+ )?
+ .clone();
+
+ if let Some(s) = s.strip_suffix(delimeter) {
+ res.push(Some(s.to_string()));
+ } else {
+ res.push(Some(s));
+ }
+ } else {
+ res.push(None);
+ }
+ }
+ }
_ => {
- res.truncate(res.len() - delimeter.len());
- Ok(Arc::new(StringArray::from(vec![Some(res)])))
+ // delimeter length is 1
+ assert_eq!(delimeters.len(), 1);
+ let delimeter = delimeters[0].unwrap();
+ let s = compute_array_to_string(
+ &mut arg,
+ arr.clone(),
+ delimeter.to_string(),
+ null_string,
+ with_null_string,
+ )?
+ .clone();
+
+ if !s.is_empty() {
+ let s = s.strip_suffix(delimeter).unwrap().to_string();
+ res.push(Some(s));
+ } else {
+ res.push(Some(s));
+ }
}
}
+
+ Ok(Arc::new(StringArray::from(res)))
}
/// Trim_array SQL function