This is an automated email from the ASF dual-hosted git repository. findepi pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/datafusion.git
The following commit(s) were added to refs/heads/main by this push: new a431bf7b32 Add assertion that ScalarUDFImpl implementation is consistent with declared return type (#17515) a431bf7b32 is described below commit a431bf7b325f20c131df2b77b98069176d1776a9 Author: Piotr Findeisen <piotr.findei...@gmail.com> AuthorDate: Fri Sep 12 07:31:07 2025 -0700 Add assertion that ScalarUDFImpl implementation is consistent with declared return type (#17515) * Drop redundant special casing of no-args case in spark_array impl The default branch picks `Int32` when all arguments are Null-typed, so it's applicable to no-args just as good. * fix typo * Check function return value's type The planner takes into account the return type a function promises to return. It even passes it back on invoke as a reminder/convenience. Verify that each function delivers on the promise. * enable assertion for sqllogictest run locally * condition on debug_assertions instead of FF it's a bit of FF misuse * fix code * fixup! condition on debug_assertions instead of FF * Improve style * switch to internal_err * fixup! Improve style --- datafusion/expr/src/udf.rs | 18 ++++++++- datafusion/ffi/tests/ffi_integration.rs | 2 +- datafusion/ffi/tests/ffi_udaf.rs | 2 +- datafusion/ffi/tests/ffi_udf.rs | 2 +- datafusion/ffi/tests/ffi_udtf.rs | 2 +- datafusion/ffi/tests/ffi_udwf.rs | 2 +- datafusion/spark/src/function/array/spark_array.rs | 45 ++++++++-------------- datafusion/spark/src/function/url/parse_url.rs | 7 ++-- 8 files changed, 43 insertions(+), 37 deletions(-) diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs index db6d9aa420..758c6b5f2f 100644 --- a/datafusion/expr/src/udf.rs +++ b/datafusion/expr/src/udf.rs @@ -233,7 +233,23 @@ impl ScalarUDF { /// /// See [`ScalarUDFImpl::invoke_with_args`] for details. pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> { - self.inner.invoke_with_args(args) + #[cfg(debug_assertions)] + let return_field = Arc::clone(&args.return_field); + let result = self.inner.invoke_with_args(args)?; + // Maybe this could be enabled always? + // This doesn't use debug_assert!, but it's meant to run anywhere except on production. It's same in spirit, thus conditioning on debug_assertions. + #[cfg(debug_assertions)] + { + if &result.data_type() != return_field.data_type() { + return datafusion_common::internal_err!("Function '{}' returned value of type '{:?}' while the following type was promised at planning time and expected: '{:?}'", + self.name(), + result.data_type(), + return_field.data_type() + ); + } + // TODO verify return data is non-null when it was promised to be? + } + Ok(result) } /// Get the circuits of inner implementation diff --git a/datafusion/ffi/tests/ffi_integration.rs b/datafusion/ffi/tests/ffi_integration.rs index 1ef16fbaa4..eb53e76bfb 100644 --- a/datafusion/ffi/tests/ffi_integration.rs +++ b/datafusion/ffi/tests/ffi_integration.rs @@ -16,7 +16,7 @@ // under the License. /// Add an additional module here for convenience to scope this to only -/// when the feature integtation-tests is built +/// when the feature integration-tests is built #[cfg(feature = "integration-tests")] mod tests { use datafusion::error::{DataFusionError, Result}; diff --git a/datafusion/ffi/tests/ffi_udaf.rs b/datafusion/ffi/tests/ffi_udaf.rs index 31b1f47391..ffd99bac62 100644 --- a/datafusion/ffi/tests/ffi_udaf.rs +++ b/datafusion/ffi/tests/ffi_udaf.rs @@ -16,7 +16,7 @@ // under the License. /// Add an additional module here for convenience to scope this to only -/// when the feature integtation-tests is built +/// when the feature integration-tests is built #[cfg(feature = "integration-tests")] mod tests { use arrow::array::Float64Array; diff --git a/datafusion/ffi/tests/ffi_udf.rs b/datafusion/ffi/tests/ffi_udf.rs index bbc23552de..fd6a84bcf5 100644 --- a/datafusion/ffi/tests/ffi_udf.rs +++ b/datafusion/ffi/tests/ffi_udf.rs @@ -16,7 +16,7 @@ // under the License. /// Add an additional module here for convenience to scope this to only -/// when the feature integtation-tests is built +/// when the feature integration-tests is built #[cfg(feature = "integration-tests")] mod tests { diff --git a/datafusion/ffi/tests/ffi_udtf.rs b/datafusion/ffi/tests/ffi_udtf.rs index 5a46211d3b..8c1c64a092 100644 --- a/datafusion/ffi/tests/ffi_udtf.rs +++ b/datafusion/ffi/tests/ffi_udtf.rs @@ -16,7 +16,7 @@ // under the License. /// Add an additional module here for convenience to scope this to only -/// when the feature integtation-tests is built +/// when the feature integration-tests is built #[cfg(feature = "integration-tests")] mod tests { diff --git a/datafusion/ffi/tests/ffi_udwf.rs b/datafusion/ffi/tests/ffi_udwf.rs index db9ebba0fd..18ffd0c5bc 100644 --- a/datafusion/ffi/tests/ffi_udwf.rs +++ b/datafusion/ffi/tests/ffi_udwf.rs @@ -16,7 +16,7 @@ // under the License. /// Add an additional module here for convenience to scope this to only -/// when the feature integtation-tests is built +/// when the feature integration-tests is built #[cfg(feature = "integration-tests")] mod tests { use arrow::array::{create_array, ArrayRef}; diff --git a/datafusion/spark/src/function/array/spark_array.rs b/datafusion/spark/src/function/array/spark_array.rs index 9ca6a8a3d1..1644cde7ab 100644 --- a/datafusion/spark/src/function/array/spark_array.rs +++ b/datafusion/spark/src/function/array/spark_array.rs @@ -73,26 +73,23 @@ impl ScalarUDFImpl for SparkArray { } fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> { - match arg_types.len() { - 0 => Ok(empty_array_type()), - _ => { - let mut expr_type = DataType::Null; - for arg_type in arg_types { - if !arg_type.equals_datatype(&DataType::Null) { - expr_type = arg_type.clone(); - break; - } - } - - if expr_type.is_null() { - expr_type = DataType::Int32; - } - - Ok(DataType::List(Arc::new(Field::new_list_field( - expr_type, true, - )))) + let mut expr_type = DataType::Null; + for arg_type in arg_types { + if !arg_type.equals_datatype(&DataType::Null) { + expr_type = arg_type.clone(); + break; } } + + if expr_type.is_null() { + expr_type = DataType::Int32; + } + + Ok(DataType::List(Arc::new(Field::new( + ARRAY_FIELD_DEFAULT_NAME, + expr_type, + true, + )))) } fn return_field_from_args(&self, args: ReturnFieldArgs) -> Result<FieldRef> { @@ -104,7 +101,7 @@ impl ScalarUDFImpl for SparkArray { .collect::<Vec<_>>(); let return_type = self.return_type(&data_types)?; Ok(Arc::new(Field::new( - ARRAY_FIELD_DEFAULT_NAME, + "this_field_name_is_irrelevant", return_type, false, ))) @@ -143,15 +140,6 @@ impl ScalarUDFImpl for SparkArray { } } -// Empty array is a special case that is useful for many other array functions -pub(super) fn empty_array_type() -> DataType { - DataType::List(Arc::new(Field::new( - ARRAY_FIELD_DEFAULT_NAME, - DataType::Int32, - true, - ))) -} - /// `make_array_inner` is the implementation of the `make_array` function. /// Constructs an array using the input `data` as `ArrayRef`. /// Returns a reference-counted `Array` instance result. @@ -174,6 +162,7 @@ pub fn make_array_inner(arrays: &[ArrayRef]) -> Result<ArrayRef> { Ok(Arc::new( SingleRowListArrayBuilder::new(array) .with_nullable(true) + .with_field_name(Some(ARRAY_FIELD_DEFAULT_NAME.to_string())) .build_list_array(), )) } diff --git a/datafusion/spark/src/function/url/parse_url.rs b/datafusion/spark/src/function/url/parse_url.rs index 5c3b1d7d3e..f9c33060cc 100644 --- a/datafusion/spark/src/function/url/parse_url.rs +++ b/datafusion/spark/src/function/url/parse_url.rs @@ -19,7 +19,8 @@ use std::any::Any; use std::sync::Arc; use arrow::array::{ - Array, ArrayRef, GenericStringBuilder, LargeStringArray, StringArray, StringArrayType, + Array, ArrayRef, GenericStringBuilder, LargeStringArray, StringArray, + StringArrayType, StringViewArray, }; use arrow::datatypes::DataType; use datafusion_common::cast::{ @@ -222,7 +223,7 @@ fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> { ) } (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => { - process_parse_url::<_, _, _, StringArray>( + process_parse_url::<_, _, _, StringViewArray>( as_string_view_array(url)?, as_string_view_array(part)?, as_string_view_array(key)?, @@ -255,7 +256,7 @@ fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> { ) } (DataType::Utf8View, DataType::Utf8View) => { - process_parse_url::<_, _, _, StringArray>( + process_parse_url::<_, _, _, StringViewArray>( as_string_view_array(url)?, as_string_view_array(part)?, &key, --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org For additional commands, e-mail: commits-h...@datafusion.apache.org