This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new bf6c82fc18 Optimize `struct` and `named_struct` functions (#11688)
bf6c82fc18 is described below
commit bf6c82fc18a8243380b8302f9b266344b6955656
Author: Alexander Rafferty <[email protected]>
AuthorDate: Wed Sep 4 06:12:04 2024 +1000
Optimize `struct` and `named_struct` functions (#11688)
* Remove unnecessary heap allocations in implementation of
`named_struct_expr` caused by zipping then unzipping fields and values.
* Change implementation of `array_struct` to reduce number of allocations
* Remove tests already covered by `struct.slt`
---
datafusion/functions/src/core/named_struct.rs | 21 ++++-----
datafusion/functions/src/core/struct.rs | 68 +++++----------------------
2 files changed, 21 insertions(+), 68 deletions(-)
diff --git a/datafusion/functions/src/core/named_struct.rs
b/datafusion/functions/src/core/named_struct.rs
index f71b1b00f0..85c3327453 100644
--- a/datafusion/functions/src/core/named_struct.rs
+++ b/datafusion/functions/src/core/named_struct.rs
@@ -70,20 +70,17 @@ fn named_struct_expr(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
}
}
- let arrays = ColumnarValue::values_to_arrays(&values)?;
-
- let fields = names
+ let fields: Fields = names
.into_iter()
- .zip(arrays)
- .map(|(name, value)| {
- (
- Arc::new(Field::new(name, value.data_type().clone(), true)),
- value,
- )
- })
- .collect::<Vec<_>>();
+ .zip(&values)
+ .map(|(name, value)| Arc::new(Field::new(name,
value.data_type().clone(), true)))
+ .collect::<Vec<_>>()
+ .into();
+
+ let arrays = ColumnarValue::values_to_arrays(&values)?;
- Ok(ColumnarValue::Array(Arc::new(StructArray::from(fields))))
+ let struct_array = StructArray::new(fields, arrays, None);
+ Ok(ColumnarValue::Array(Arc::new(struct_array)))
}
#[derive(Debug)]
diff --git a/datafusion/functions/src/core/struct.rs
b/datafusion/functions/src/core/struct.rs
index c3dee8b1cc..bdddbb81be 100644
--- a/datafusion/functions/src/core/struct.rs
+++ b/datafusion/functions/src/core/struct.rs
@@ -29,23 +29,23 @@ fn array_struct(args: &[ArrayRef]) -> Result<ArrayRef> {
return exec_err!("struct requires at least one argument");
}
- let vec: Vec<_> = args
+ let fields = args
.iter()
.enumerate()
.map(|(i, arg)| {
let field_name = format!("c{i}");
- Ok((
- Arc::new(Field::new(
- field_name.as_str(),
- arg.data_type().clone(),
- true,
- )),
- Arc::clone(arg),
- ))
+ Ok(Arc::new(Field::new(
+ field_name.as_str(),
+ arg.data_type().clone(),
+ true,
+ )))
})
- .collect::<Result<Vec<_>>>()?;
+ .collect::<Result<Vec<_>>>()?
+ .into();
- Ok(Arc::new(StructArray::from(vec)))
+ let arrays = args.to_vec();
+
+ Ok(Arc::new(StructArray::new(fields, arrays, None)))
}
/// put values in a struct array.
@@ -53,6 +53,7 @@ fn struct_expr(args: &[ColumnarValue]) ->
Result<ColumnarValue> {
let arrays = ColumnarValue::values_to_arrays(args)?;
Ok(ColumnarValue::Array(array_struct(arrays.as_slice())?))
}
+
#[derive(Debug)]
pub struct StructFunc {
signature: Signature,
@@ -97,48 +98,3 @@ impl ScalarUDFImpl for StructFunc {
struct_expr(args)
}
}
-
-#[cfg(test)]
-mod tests {
- use super::*;
- use arrow::array::Int64Array;
- use datafusion_common::cast::as_struct_array;
- use datafusion_common::ScalarValue;
-
- #[test]
- fn test_struct() {
- // struct(1, 2, 3) = {"c0": 1, "c1": 2, "c2": 3}
- let args = [
- ColumnarValue::Scalar(ScalarValue::Int64(Some(1))),
- ColumnarValue::Scalar(ScalarValue::Int64(Some(2))),
- ColumnarValue::Scalar(ScalarValue::Int64(Some(3))),
- ];
- let struc = struct_expr(&args)
- .expect("failed to initialize function struct")
- .into_array(1)
- .expect("Failed to convert to array");
- let result =
- as_struct_array(&struc).expect("failed to initialize function
struct");
- assert_eq!(
- &Int64Array::from(vec![1]),
- Arc::clone(result.column_by_name("c0").unwrap())
- .as_any()
- .downcast_ref::<Int64Array>()
- .unwrap()
- );
- assert_eq!(
- &Int64Array::from(vec![2]),
- Arc::clone(result.column_by_name("c1").unwrap())
- .as_any()
- .downcast_ref::<Int64Array>()
- .unwrap()
- );
- assert_eq!(
- &Int64Array::from(vec![3]),
- Arc::clone(result.column_by_name("c2").unwrap())
- .as_any()
- .downcast_ref::<Int64Array>()
- .unwrap()
- );
- }
-}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]