This is an automated email from the ASF dual-hosted git repository.
alamb pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/arrow-datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new c5faaf7f22 Minor: Improve documentation about
`ColumnarValues::values_to_array` (#9774)
c5faaf7f22 is described below
commit c5faaf7f22a715bf79cb1289f2b5c15131f95ecb
Author: Andrew Lamb <[email protected]>
AuthorDate: Sun Mar 24 18:38:26 2024 -0400
Minor: Improve documentation about `ColumnarValues::values_to_array` (#9774)
* Minor: Improve documentation about `ColumnarValues::values_to_array`
* Apply suggestions from code review
Co-authored-by: Liang-Chi Hsieh <[email protected]>
---------
Co-authored-by: Liang-Chi Hsieh <[email protected]>
---
datafusion/expr/src/columnar_value.rs | 14 +++++++++++---
datafusion/expr/src/udf.rs | 6 ++++--
2 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/datafusion/expr/src/columnar_value.rs
b/datafusion/expr/src/columnar_value.rs
index 831edc078d..87c3c063b9 100644
--- a/datafusion/expr/src/columnar_value.rs
+++ b/datafusion/expr/src/columnar_value.rs
@@ -26,11 +26,14 @@ use datafusion_common::{internal_err, Result, ScalarValue};
use std::sync::Arc;
/// Represents the result of evaluating an expression: either a single
-/// `ScalarValue` or an [`ArrayRef`].
+/// [`ScalarValue`] or an [`ArrayRef`].
///
/// While a [`ColumnarValue`] can always be converted into an array
/// for convenience, it is often much more performant to provide an
/// optimized path for scalar values.
+///
+/// See [`ColumnarValue::values_to_arrays`] for a function that converts
+/// multiple columnar values into arrays of the same length.
#[derive(Clone, Debug)]
pub enum ColumnarValue {
/// Array of values
@@ -59,8 +62,13 @@ impl ColumnarValue {
}
}
- /// Convert a columnar value into an ArrayRef. [`Self::Scalar`] is
- /// converted by repeating the same scalar multiple times.
+ /// Convert a columnar value into an Arrow [`ArrayRef`] with the specified
+ /// number of rows. [`Self::Scalar`] is converted by repeating the same
+ /// scalar multiple times which is not as efficient as handling the scalar
+ /// directly.
+ ///
+ /// See [`Self::values_to_arrays`] to convert multiple columnar values into
+ /// arrays of the same length.
///
/// # Errors
///
diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index 3002a74505..56266a0517 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -326,8 +326,10 @@ pub trait ScalarUDFImpl: Debug + Send + Sync {
///
/// For the best performance, the implementations of `invoke` should handle
/// the common case when one or more of their arguments are constant values
- /// (aka [`ColumnarValue::Scalar`]). Calling [`ColumnarValue::into_array`]
- /// and treating all arguments as arrays will work, but will be slower.
+ /// (aka [`ColumnarValue::Scalar`]).
+ ///
+ /// [`ColumnarValue::values_to_arrays`] can be used to convert the
arguments
+ /// to arrays, which will likely be simpler code, but be slower.
fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue>;
/// Returns any aliases (alternate names) for this function.