This is an automated email from the ASF dual-hosted git repository.
jonah pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push:
new 399e8403d2 Ensure that math functions fulfil the ColumnarValue
contract (#12922)
399e8403d2 is described below
commit 399e8403d2fb5a058fb263bc2449dbc9d7529841
Author: Georgi Krastev <[email protected]>
AuthorDate: Wed Oct 16 04:58:45 2024 +0300
Ensure that math functions fulfil the ColumnarValue contract (#12922)
If all UDF arguments are scalars, so should be the result.
In most cases, such function calls will be contant-folded,
however if for whatever reason the are not optimized,
we want to avoid an error due to array length mismatch.
---
datafusion/expr-common/src/columnar_value.rs | 14 ++++++++++++--
datafusion/functions/src/macros.rs | 16 ++++++++--------
2 files changed, 20 insertions(+), 10 deletions(-)
diff --git a/datafusion/expr-common/src/columnar_value.rs
b/datafusion/expr-common/src/columnar_value.rs
index bfefb37c98..7b614ba9c4 100644
--- a/datafusion/expr-common/src/columnar_value.rs
+++ b/datafusion/expr-common/src/columnar_value.rs
@@ -17,8 +17,7 @@
//! [`ColumnarValue`] represents the result of evaluating an expression.
-use arrow::array::ArrayRef;
-use arrow::array::NullArray;
+use arrow::array::{Array, ArrayRef, NullArray};
use arrow::compute::{kernels, CastOptions};
use arrow::datatypes::{DataType, TimeUnit};
use datafusion_common::format::DEFAULT_CAST_OPTIONS;
@@ -218,6 +217,17 @@ impl ColumnarValue {
}
}
}
+
+ /// Converts an [`ArrayRef`] to a [`ColumnarValue`] based on the supplied
arguments.
+ /// This is useful for scalar UDF implementations to fulfil their contract:
+ /// if all arguments are scalar values, the result should also be a scalar
value.
+ pub fn from_args_and_result(args: &[Self], result: ArrayRef) ->
Result<Self> {
+ if result.len() == 1 && args.iter().all(|arg| matches!(arg,
Self::Scalar(_))) {
+ Ok(Self::Scalar(ScalarValue::try_from_array(&result, 0)?))
+ } else {
+ Ok(Self::Array(result))
+ }
+ }
}
#[cfg(test)]
diff --git a/datafusion/functions/src/macros.rs
b/datafusion/functions/src/macros.rs
index cf25ff8328..85ffaa868f 100644
--- a/datafusion/functions/src/macros.rs
+++ b/datafusion/functions/src/macros.rs
@@ -228,9 +228,8 @@ macro_rules! make_math_unary_udf {
$EVALUATE_BOUNDS(inputs)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
-
+ fn invoke(&self, col_args: &[ColumnarValue]) ->
Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(col_args)?;
let arr: ArrayRef = match args[0].data_type() {
DataType::Float64 => {
Arc::new(make_function_scalar_inputs_return_type!(
@@ -257,7 +256,8 @@ macro_rules! make_math_unary_udf {
)
}
};
- Ok(ColumnarValue::Array(arr))
+
+ ColumnarValue::from_args_and_result(col_args, arr)
}
fn documentation(&self) -> Option<&Documentation> {
@@ -344,9 +344,8 @@ macro_rules! make_math_binary_udf {
$OUTPUT_ORDERING(input)
}
- fn invoke(&self, args: &[ColumnarValue]) ->
Result<ColumnarValue> {
- let args = ColumnarValue::values_to_arrays(args)?;
-
+ fn invoke(&self, col_args: &[ColumnarValue]) ->
Result<ColumnarValue> {
+ let args = ColumnarValue::values_to_arrays(col_args)?;
let arr: ArrayRef = match args[0].data_type() {
DataType::Float64 => Arc::new(make_function_inputs2!(
&args[0],
@@ -372,7 +371,8 @@ macro_rules! make_math_binary_udf {
)
}
};
- Ok(ColumnarValue::Array(arr))
+
+ ColumnarValue::from_args_and_result(col_args, arr)
}
fn documentation(&self) -> Option<&Documentation> {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]