This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new 49583bd501 Column support for array_to_string (#6940)
49583bd501 is described below

commit 49583bd5010282ca126e75100dce958aa346e5ee
Author: Jay Zhan <[email protected]>
AuthorDate: Sun Jul 16 21:17:35 2023 +0800

    Column support for array_to_string (#6940)
    
    * revert array.slt that changed by #6595
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * add test for to string
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * first draft
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    * cleanup
    
    Signed-off-by: jayzhan211 <[email protected]>
    
    ---------
    
    Signed-off-by: jayzhan211 <[email protected]>
---
 .../core/tests/sqllogictests/test_files/array.slt  | 112 +++++++++++++++------
 datafusion/physical-expr/src/array_expressions.rs  |  67 +++++++++---
 2 files changed, 131 insertions(+), 48 deletions(-)

diff --git a/datafusion/core/tests/sqllogictests/test_files/array.slt 
b/datafusion/core/tests/sqllogictests/test_files/array.slt
index d9b3449dfe..f0f50ccc93 100644
--- a/datafusion/core/tests/sqllogictests/test_files/array.slt
+++ b/datafusion/core/tests/sqllogictests/test_files/array.slt
@@ -365,22 +365,22 @@ select array_prepend(100.1, column2), array_prepend('.', 
column3) from arrays;
 ## array_fill
 
 # array_fill scalar function #1
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query ???
 select array_fill(11, make_array(1, 2, 3)), array_fill(3, make_array(2, 3)), 
array_fill(2, make_array(2));
+----
+[[[11, 11, 11], [11, 11, 11]]] [[3, 3, 3], [3, 3, 3]] [2, 2]
 
 # array_fill scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query ??
 select array_fill(1, make_array(1, 1, 1)), array_fill(2, make_array(2, 2, 2, 
2, 2));
+----
+[[[1]]] [[[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], 
[2, 2]]]], [[[[2, 2], [2, 2]], [[2, 2], [2, 2]]], [[[2, 2], [2, 2]], [[2, 2], 
[2, 2]]]]]
 
 # array_fill scalar function #3
-query error DataFusion error: SQL error: TokenizerError\("Unterminated string 
literal at Line: 2, Column 856"\)
-caused by
-Internal error: Optimizer rule 'simplify_expressions' failed, due to generate 
a different schema, original schema: DFSchema \{ fields: \[DFField \{ 
qualifier: None, field: Field \{ name: 
"array_fill\(Int64\(1\),make_array\(\)\)", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \} \}\], metadata: \{\} \}, new schema: DFSchema \{ fields: \[DF 
[...]
+query ?
 select array_fill(1, make_array())
+----
+[]
 
 ## array_concat
 
@@ -661,16 +661,16 @@ select array_to_string(['h', 'e', 'l', 'l', 'o'], ','), 
array_to_string([1, 2, 3
 h,e,l,l,o 1-2-3-4-5 1|2|3
 
 # array_to_string scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Arrow error: Cast error: Cannot cast string '1\+2\+3\+4\+5\+6' to value of 
Int64 type
+query TTT
 select array_to_string([1, 1, 1], '1'), array_to_string([[1, 2], [3, 4], [5, 
6]], '+'), array_to_string(array_fill(3, [3, 2, 2]), '/\');
+----
+11111 1+2+3+4+5+6 3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3/\3
 
 # array_to_string scalar function #3
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert Utf8 to List\(Field \{ 
name: "item", data_type: Null, nullable: true, dict_id: 0, dict_is_ordered: 
false, metadata: \{\} \}\)
+query T
 select array_to_string(make_array(), ',')
+----
+(empty)
 
 # array_to_string scalar function with nulls #1
 query TTT
@@ -684,6 +684,56 @@ select array_to_string(make_array('h', NULL, NULL, NULL, 
'o'), ',', '-'), array_
 ----
 h,-,-,-,o nil-2-nil-4-5 1|0|3
 
+# array_to_string with columns #1
+
+# For reference
+# select column1, column4 from arrays_values;
+# ----
+# [, 2, 3, 4, 5, 6, 7, 8, 9, 10] ,
+# [11, 12, 13, 14, 15, 16, 17, 18, , 20] .
+# [21, 22, 23, , 25, 26, 27, 28, 29, 30] -
+# [31, 32, 33, 34, 35, , 37, 38, 39, 40] ok
+# NULL @
+# [41, 42, 43, 44, 45, 46, 47, 48, 49, 50] $
+# [51, 52, , 54, 55, 56, 57, 58, 59, 60] ^
+# [61, 62, 63, 64, 65, 66, 67, 68, 69, 70] NULL
+
+query T
+select array_to_string(column1, column4) from arrays_values;
+----
+2,3,4,5,6,7,8,9,10
+11.12.13.14.15.16.17.18.20
+21-22-23-25-26-27-28-29-30
+31ok32ok33ok34ok35ok37ok38ok39ok40
+NULL
+41$42$43$44$45$46$47$48$49$50
+51^52^54^55^56^57^58^59^60
+NULL
+
+query TT
+select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') 
from arrays_values;
+----
+2_3_4_5_6_7_8_9_10 1/2/3
+11_12_13_14_15_16_17_18_20 1/2/3
+21_22_23_25_26_27_28_29_30 1/2/3
+31_32_33_34_35_37_38_39_40 1/2/3
+NULL 1/2/3
+41_42_43_44_45_46_47_48_49_50 1/2/3
+51_52_54_55_56_57_58_59_60 1/2/3
+61_62_63_64_65_66_67_68_69_70 1/2/3
+
+query TT
+select array_to_string(column1, '_', '*'), 
array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values;
+----
+*_2_3_4_5_6_7_8_9_10 1.2.3
+11_12_13_14_15_16_17_18_*_20 1.2.3
+21_22_23_*_25_26_27_28_29_30 1.2.3
+31_32_33_34_35_*_37_38_39_40 1.2.3
+NULL 1.2.3
+41_42_43_44_45_46_47_48_49_50 1.2.3
+51_52_*_54_55_56_57_58_59_60 1.2.3
+61_62_63_64_65_66_67_68_69_70 1.2.3
+
 ## cardinality
 
 # cardinality scalar function
@@ -693,10 +743,10 @@ select cardinality(make_array(1, 2, 3, 4, 5)), 
cardinality([1, 3, 5]), cardinali
 5 3 5
 
 # cardinality scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query II
 select cardinality(make_array([1, 2], [3, 4], [5, 6])), 
cardinality(array_fill(3, array[3, 2, 3]));
+----
+6 18
 
 # cardinality scalar function #3
 query II
@@ -725,10 +775,10 @@ select trim_array(make_array(1, 2, 3, 4, 5), 2), 
trim_array(['h', 'e', 'l', 'l',
 [1, 2, 3] [h, e] [1.0]
 
 # trim_array scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query ??
 select trim_array([[1, 2], [3, 4], [5, 6]], 2), trim_array(array_fill(4, [3, 
4, 2]), 2);
+----
+[[1, 2]] [[[4, 4], [4, 4], [4, 4], [4, 4]]]
 
 # trim_array scalar function #3
 query ?
@@ -763,10 +813,10 @@ select array_length(make_array(1, 2, 3, 4, 5), 2), 
array_length(make_array(1, 2,
 NULL NULL 2
 
 # array_length scalar function #4
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query IIII
 select array_length(array_fill(3, [3, 2, 5]), 1), array_length(array_fill(3, 
[3, 2, 5]), 2), array_length(array_fill(3, [3, 2, 5]), 3), 
array_length(array_fill(3, [3, 2, 5]), 4);
+----
+3 2 5 NULL
 
 # array_length scalar function #5
 query III
@@ -809,10 +859,10 @@ select array_dims(make_array(1, 2, 3)), 
array_dims(make_array([1, 2], [3, 4])),
 [3] [2, 2] [1, 1, 1, 2, 1]
 
 # array_dims scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query ??
 select array_dims(array_fill(2, [1, 2, 3])), array_dims(array_fill(3, [2, 5, 
4]));
+----
+[1, 2, 3] [2, 5, 4]
 
 # array_dims scalar function #3
 query ??
@@ -841,10 +891,10 @@ select array_ndims(make_array(1, 2, 3)), 
array_ndims(make_array([1, 2], [3, 4]))
 1 2 5
 
 # array_ndims scalar function #2
-query error DataFusion error: SQL error: ParserError\("Expected an SQL 
statement, found: caused"\)
-caused by
-Error during planning: Cannot automatically convert List\(Field \{ name: 
"item", data_type: List\(Field \{ name: "item", data_type: List\(Field \{ name: 
"item", data_type: Int64, nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\), nullable: true, dict_id: 0, dict_is_ordered: false, 
metadata: \{\} \}\) to List\(Field \{ name: "item", data_type: Int64, nullable: 
true, dict_id: 0, dict_is_ordered:  [...]
+query II
 select array_ndims(array_fill(1, [1, 2, 3])), 
array_ndims([[[[[[[[[[[[[[[[[[[[[1]]]]]]]]]]]]]]]]]]]]]);
+----
+3 21
 
 # array_ndims scalar function #3
 query II
@@ -926,10 +976,8 @@ select 1 || make_array(2, 3, 4), 1.0 || make_array(2.0, 
3.0, 4.0), 'h' || make_a
 ----
 [1, 2, 3, 4] [1.0, 2.0, 3.0, 4.0] [h, e, l, l, o]
 
-
 ### Array casting tests
 
-
 ## make_array
 
 # make_array scalar function #1
diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 93c1626daf..d62bcbd119 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -1064,7 +1064,6 @@ macro_rules! to_string {
                 }
             }
         }
-
         Ok($ARG)
     }};
 }
@@ -1072,9 +1071,10 @@ macro_rules! to_string {
 /// Array_to_string SQL function
 pub fn array_to_string(args: &[ArrayRef]) -> Result<ArrayRef> {
     let arr = &args[0];
-    let delimeter = as_generic_string_array::<i32>(&args[1])?
-        .value(0)
-        .to_string();
+
+    let delimeters = as_generic_string_array::<i32>(&args[1])?;
+    let delimeters: Vec<Option<&str>> = delimeters.iter().collect();
+
     let mut null_string = String::from("");
     let mut with_null_string = false;
     if args.len() == 3 {
@@ -1219,21 +1219,56 @@ pub fn array_to_string(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     }
 
     let mut arg = String::from("");
-    let mut res = compute_array_to_string(
-        &mut arg,
-        arr.clone(),
-        delimeter.clone(),
-        null_string,
-        with_null_string,
-    )?
-    .clone();
-    match res.as_str() {
-        "" => Ok(Arc::new(StringArray::from(vec![Some(res)]))),
+    let mut res: Vec<Option<String>> = Vec::new();
+
+    match arr.data_type() {
+        DataType::List(_) | DataType::LargeList(_) | 
DataType::FixedSizeList(_, _) => {
+            let list_array = arr.as_list::<i32>();
+            for (arr, &delimeter) in list_array.iter().zip(delimeters.iter()) {
+                if let (Some(arr), Some(delimeter)) = (arr, delimeter) {
+                    arg = String::from("");
+                    let s = compute_array_to_string(
+                        &mut arg,
+                        arr,
+                        delimeter.to_string(),
+                        null_string.clone(),
+                        with_null_string,
+                    )?
+                    .clone();
+
+                    if let Some(s) = s.strip_suffix(delimeter) {
+                        res.push(Some(s.to_string()));
+                    } else {
+                        res.push(Some(s));
+                    }
+                } else {
+                    res.push(None);
+                }
+            }
+        }
         _ => {
-            res.truncate(res.len() - delimeter.len());
-            Ok(Arc::new(StringArray::from(vec![Some(res)])))
+            // delimeter length is 1
+            assert_eq!(delimeters.len(), 1);
+            let delimeter = delimeters[0].unwrap();
+            let s = compute_array_to_string(
+                &mut arg,
+                arr.clone(),
+                delimeter.to_string(),
+                null_string,
+                with_null_string,
+            )?
+            .clone();
+
+            if !s.is_empty() {
+                let s = s.strip_suffix(delimeter).unwrap().to_string();
+                res.push(Some(s));
+            } else {
+                res.push(Some(s));
+            }
         }
     }
+
+    Ok(Arc::new(StringArray::from(res)))
 }
 
 /// Trim_array SQL function

Reply via email to