This is an automated email from the ASF dual-hosted git repository.

alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new bf6c82fc18 Optimize `struct` and `named_struct` functions (#11688)
bf6c82fc18 is described below

commit bf6c82fc18a8243380b8302f9b266344b6955656
Author: Alexander Rafferty <[email protected]>
AuthorDate: Wed Sep 4 06:12:04 2024 +1000

    Optimize `struct` and `named_struct` functions (#11688)
    
    * Remove unnecessary heap allocations in implementation of 
`named_struct_expr` caused by zipping then unzipping fields and values.
    
    * Change implementation of `array_struct` to reduce number of allocations
    
    * Remove tests already covered by `struct.slt`
---
 datafusion/functions/src/core/named_struct.rs | 21 ++++-----
 datafusion/functions/src/core/struct.rs       | 68 +++++----------------------
 2 files changed, 21 insertions(+), 68 deletions(-)

diff --git a/datafusion/functions/src/core/named_struct.rs 
b/datafusion/functions/src/core/named_struct.rs
index f71b1b00f0..85c3327453 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -70,20 +70,17 @@ fn named_struct_expr(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
         }
     }
 
-    let arrays = ColumnarValue::values_to_arrays(&values)?;
-
-    let fields = names
+    let fields: Fields = names
         .into_iter()
-        .zip(arrays)
-        .map(|(name, value)| {
-            (
-                Arc::new(Field::new(name, value.data_type().clone(), true)),
-                value,
-            )
-        })
-        .collect::<Vec<_>>();
+        .zip(&values)
+        .map(|(name, value)| Arc::new(Field::new(name, 
value.data_type().clone(), true)))
+        .collect::<Vec<_>>()
+        .into();
+
+    let arrays = ColumnarValue::values_to_arrays(&values)?;
 
-    Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields))))
+    let struct_array = StructArray::new(fields, arrays, None);
+    Ok(ColumnarValue::Array(Arc::new(struct_array)))
 }
 
 #[derive(Debug)]
diff --git a/datafusion/functions/src/core/struct.rs 
b/datafusion/functions/src/core/struct.rs
index c3dee8b1cc..bdddbb81be 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -29,23 +29,23 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
         return exec_err!("struct requires at least one argument");
     }
 
-    let vec: Vec<_> = args
+    let fields = args
         .iter()
         .enumerate()
         .map(|(i, arg)| {
             let field_name = format!("c{i}");
-            Ok((
-                Arc::new(Field::new(
-                    field_name.as_str(),
-                    arg.data_type().clone(),
-                    true,
-                )),
-                Arc::clone(arg),
-            ))
+            Ok(Arc::new(Field::new(
+                field_name.as_str(),
+                arg.data_type().clone(),
+                true,
+            )))
         })
-        .collect::<Result<Vec<_>>>()?;
+        .collect::<Result<Vec<_>>>()?
+        .into();
 
-    Ok(Arc::new(StructArray::from(vec)))
+    let arrays = args.to_vec();
+
+    Ok(Arc::new(StructArray::new(fields, arrays, None)))
 }
 
 /// put values in a struct array.
@@ -53,6 +53,7 @@ fn struct_expr(args: &[ColumnarValue]) -> 
Result<ColumnarValue> {
     let arrays = ColumnarValue::values_to_arrays(args)?;
     Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?))
 }
+
 #[derive(Debug)]
 pub struct StructFunc {
     signature: Signature,
@@ -97,48 +98,3 @@ impl ScalarUDFImpl for StructFunc {
         struct_expr(args)
     }
 }
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-    use arrow::array::Int64Array;
-    use datafusion_common::cast::as_struct_array;
-    use datafusion_common::ScalarValue;
-
-    #[test]
-    fn test_struct() {
-        // struct(1, 2, 3) = {"c0": 1, "c1": 2, "c2": 3}
-        let args = [
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(2))),
-            ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
-        ];
-        let struc = struct_expr(&args)
-            .expect("failed to initialize function struct")
-            .into_array(1)
-            .expect("Failed to convert to array");
-        let result =
-            as_struct_array(&struc).expect("failed to initialize function 
struct");
-        assert_eq!(
-            &Int64Array::from(vec![1]),
-            Arc::clone(result.column_by_name("c0").unwrap())
-                .as_any()
-                .downcast_ref::<Int64Array>()
-                .unwrap()
-        );
-        assert_eq!(
-            &Int64Array::from(vec![2]),
-            Arc::clone(result.column_by_name("c1").unwrap())
-                .as_any()
-                .downcast_ref::<Int64Array>()
-                .unwrap()
-        );
-        assert_eq!(
-            &Int64Array::from(vec![3]),
-            Arc::clone(result.column_by_name("c2").unwrap())
-                .as_any()
-                .downcast_ref::<Int64Array>()
-                .unwrap()
-        );
-    }
-}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to