This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new b2e8848406 feat: support `largelist` in `array_to_string` (#8729)
b2e8848406 is described below

commit b2e8848406b0884fbc631e130c3e25e35550a1a1
Author: Alex Huang <[email protected]>
AuthorDate: Sat Jan 6 05:09:24 2024 +0800

    feat: support `largelist` in `array_to_string` (#8729)
    
    * support largelist in array_to_string
    
    * reduce code duplication
---
 datafusion/physical-expr/src/array_expressions.rs | 94 ++++++++++++++++-------
 datafusion/sqllogictest/test_files/array.slt      | 61 +++++++++++++++
 2 files changed, 128 insertions(+), 27 deletions(-)

diff --git a/datafusion/physical-expr/src/array_expressions.rs 
b/datafusion/physical-expr/src/array_expressions.rs
index 78c490d5db..cb4ad3ed63 100644
--- a/datafusion/physical-expr/src/array_expressions.rs
+++ b/datafusion/physical-expr/src/array_expressions.rs
@@ -2022,8 +2022,21 @@ pub fn array_to_string(args: &[ArrayRef]) -> 
Result<ArrayRef> {
     ) -> Result<&mut String> {
         match arr.data_type() {
             DataType::List(..) => {
-                let list_array = downcast_arg!(arr, ListArray);
+                let list_array = as_list_array(&arr)?;
+                for i in 0..list_array.len() {
+                    compute_array_to_string(
+                        arg,
+                        list_array.value(i),
+                        delimiter.clone(),
+                        null_string.clone(),
+                        with_null_string,
+                    )?;
+                }
 
+                Ok(arg)
+            }
+            DataType::LargeList(..) => {
+                let list_array = as_large_list_array(&arr)?;
                 for i in 0..list_array.len() {
                     compute_array_to_string(
                         arg,
@@ -2055,35 +2068,61 @@ pub fn array_to_string(args: &[ArrayRef]) -> 
Result<ArrayRef> {
         }
     }
 
-    let mut arg = String::from("");
-    let mut res: Vec<Option<String>> = Vec::new();
-
-    match arr.data_type() {
-        DataType::List(_) | DataType::LargeList(_) | 
DataType::FixedSizeList(_, _) => {
-            let list_array = arr.as_list::<i32>();
-            for (arr, &delimiter) in list_array.iter().zip(delimiters.iter()) {
-                if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
-                    arg = String::from("");
-                    let s = compute_array_to_string(
-                        &mut arg,
-                        arr,
-                        delimiter.to_string(),
-                        null_string.clone(),
-                        with_null_string,
-                    )?
-                    .clone();
-
-                    if let Some(s) = s.strip_suffix(delimiter) {
-                        res.push(Some(s.to_string()));
-                    } else {
-                        res.push(Some(s));
-                    }
+    fn generate_string_array<O: OffsetSizeTrait>(
+        list_arr: &GenericListArray<O>,
+        delimiters: Vec<Option<&str>>,
+        null_string: String,
+        with_null_string: bool,
+    ) -> Result<StringArray> {
+        let mut res: Vec<Option<String>> = Vec::new();
+        for (arr, &delimiter) in list_arr.iter().zip(delimiters.iter()) {
+            if let (Some(arr), Some(delimiter)) = (arr, delimiter) {
+                let mut arg = String::from("");
+                let s = compute_array_to_string(
+                    &mut arg,
+                    arr,
+                    delimiter.to_string(),
+                    null_string.clone(),
+                    with_null_string,
+                )?
+                .clone();
+
+                if let Some(s) = s.strip_suffix(delimiter) {
+                    res.push(Some(s.to_string()));
                 } else {
-                    res.push(None);
+                    res.push(Some(s));
                 }
+            } else {
+                res.push(None);
             }
         }
+
+        Ok(StringArray::from(res))
+    }
+
+    let arr_type = arr.data_type();
+    let string_arr = match arr_type {
+        DataType::List(_) | DataType::FixedSizeList(_, _) => {
+            let list_array = as_list_array(&arr)?;
+            generate_string_array::<i32>(
+                list_array,
+                delimiters,
+                null_string,
+                with_null_string,
+            )?
+        }
+        DataType::LargeList(_) => {
+            let list_array = as_large_list_array(&arr)?;
+            generate_string_array::<i64>(
+                list_array,
+                delimiters,
+                null_string,
+                with_null_string,
+            )?
+        }
         _ => {
+            let mut arg = String::from("");
+            let mut res: Vec<Option<String>> = Vec::new();
             // delimiter length is 1
             assert_eq!(delimiters.len(), 1);
             let delimiter = delimiters[0].unwrap();
@@ -2102,10 +2141,11 @@ pub fn array_to_string(args: &[ArrayRef]) -> 
Result<ArrayRef> {
             } else {
                 res.push(Some(s));
             }
+            StringArray::from(res)
         }
-    }
+    };
 
-    Ok(Arc::new(StringArray::from(res)))
+    Ok(Arc::new(string_arr))
 }
 
 /// Cardinality SQL function
diff --git a/datafusion/sqllogictest/test_files/array.slt 
b/datafusion/sqllogictest/test_files/array.slt
index 774d67b4fd..083c4ff31b 100644
--- a/datafusion/sqllogictest/test_files/array.slt
+++ b/datafusion/sqllogictest/test_files/array.slt
@@ -3238,30 +3238,55 @@ select list_to_string(['h', 'e', 'l', 'l', 'o'], ','), 
list_to_string([1, 2, 3,
 ----
 h,e,l,l,o 1-2-3-4-5 1|2|3
 
+query TTT
+select list_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 
'LargeList(Utf8)'), ','), list_to_string(arrow_cast([1, 2, 3, 4, 5], 
'LargeList(Int64)'), '-'), list_to_string(arrow_cast([1.0, 2.0, 3.0], 
'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
 # array_join scalar function #5 (function alias `array_to_string`)
 query TTT
 select array_join(['h', 'e', 'l', 'l', 'o'], ','), array_join([1, 2, 3, 4, 5], 
'-'), array_join([1.0, 2.0, 3.0], '|');
 ----
 h,e,l,l,o 1-2-3-4-5 1|2|3
 
+query TTT
+select array_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 
','), array_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), 
array_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
 # list_join scalar function #6 (function alias `list_join`)
 query TTT
 select list_join(['h', 'e', 'l', 'l', 'o'], ','), list_join([1, 2, 3, 4, 5], 
'-'), list_join([1.0, 2.0, 3.0], '|');
 ----
 h,e,l,l,o 1-2-3-4-5 1|2|3
 
+query TTT
+select list_join(arrow_cast(['h', 'e', 'l', 'l', 'o'], 'LargeList(Utf8)'), 
','), list_join(arrow_cast([1, 2, 3, 4, 5], 'LargeList(Int64)'), '-'), 
list_join(arrow_cast([1.0, 2.0, 3.0], 'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
 # array_to_string scalar function with nulls #1
 query TTT
 select array_to_string(make_array('h', NULL, 'l', NULL, 'o'), ','), 
array_to_string(make_array(1, NULL, 3, NULL, 5), '-'), 
array_to_string(make_array(NULL, 2.0, 3.0), '|');
 ----
 h,l,o 1-3-5 2|3
 
+query TTT
+select array_to_string(arrow_cast(['h', 'e', 'l', 'l', 'o'], 
'LargeList(Utf8)'), ','), array_to_string(arrow_cast([1, 2, 3, 4, 5], 
'LargeList(Int64)'), '-'), array_to_string(arrow_cast([1.0, 2.0, 3.0], 
'LargeList(Float64)'), '|');
+----
+h,e,l,l,o 1-2-3-4-5 1|2|3
+
 # array_to_string scalar function with nulls #2
 query TTT
 select array_to_string(make_array('h', NULL, NULL, NULL, 'o'), ',', '-'), 
array_to_string(make_array(NULL, 2, NULL, 4, 5), '-', 'nil'), 
array_to_string(make_array(1.0, NULL, 3.0), '|', '0');
 ----
 h,-,-,-,o nil-2-nil-4-5 1|0|3
 
+query TTT
+select array_to_string(arrow_cast(make_array('h', NULL, NULL, NULL, 'o'), 
'LargeList(Utf8)'), ',', '-'), array_to_string(arrow_cast(make_array(NULL, 2, 
NULL, 4, 5), 'LargeList(Int64)'), '-', 'nil'), 
array_to_string(arrow_cast(make_array(1.0, NULL, 3.0), 'LargeList(Float64)'), 
'|', '0');
+----
+h,-,-,-,o nil-2-nil-4-5 1|0|3
+
 # array_to_string with columns #1
 
 # For reference
@@ -3288,6 +3313,18 @@ NULL
 51^52^54^55^56^57^58^59^60
 NULL
 
+query T
+select array_to_string(column1, column4) from large_arrays_values;
+----
+2,3,4,5,6,7,8,9,10
+11.12.13.14.15.16.17.18.20
+21-22-23-25-26-27-28-29-30
+31ok32ok33ok34ok35ok37ok38ok39ok40
+NULL
+41$42$43$44$45$46$47$48$49$50
+51^52^54^55^56^57^58^59^60
+NULL
+
 query TT
 select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') 
from arrays_values;
 ----
@@ -3300,6 +3337,18 @@ NULL 1/2/3
 51_52_54_55_56_57_58_59_60 1/2/3
 61_62_63_64_65_66_67_68_69_70 1/2/3
 
+query TT
+select array_to_string(column1, '_'), array_to_string(make_array(1,2,3), '/') 
from large_arrays_values;
+----
+2_3_4_5_6_7_8_9_10 1/2/3
+11_12_13_14_15_16_17_18_20 1/2/3
+21_22_23_25_26_27_28_29_30 1/2/3
+31_32_33_34_35_37_38_39_40 1/2/3
+NULL 1/2/3
+41_42_43_44_45_46_47_48_49_50 1/2/3
+51_52_54_55_56_57_58_59_60 1/2/3
+61_62_63_64_65_66_67_68_69_70 1/2/3
+
 query TT
 select array_to_string(column1, '_', '*'), 
array_to_string(make_array(make_array(1,2,3)), '.') from arrays_values;
 ----
@@ -3312,6 +3361,18 @@ NULL 1.2.3
 51_52_*_54_55_56_57_58_59_60 1.2.3
 61_62_63_64_65_66_67_68_69_70 1.2.3
 
+query TT
+select array_to_string(column1, '_', '*'), 
array_to_string(make_array(make_array(1,2,3)), '.') from large_arrays_values;
+----
+*_2_3_4_5_6_7_8_9_10 1.2.3
+11_12_13_14_15_16_17_18_*_20 1.2.3
+21_22_23_*_25_26_27_28_29_30 1.2.3
+31_32_33_34_35_*_37_38_39_40 1.2.3
+NULL 1.2.3
+41_42_43_44_45_46_47_48_49_50 1.2.3
+51_52_*_54_55_56_57_58_59_60 1.2.3
+61_62_63_64_65_66_67_68_69_70 1.2.3
+
 ## cardinality
 
 # cardinality scalar function

Reply via email to