This is an automated email from the ASF dual-hosted git repository.

findepi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion.git


The following commit(s) were added to refs/heads/main by this push:
     new a431bf7b32 Add assertion that ScalarUDFImpl implementation is 
consistent with declared return type (#17515)
a431bf7b32 is described below

commit a431bf7b325f20c131df2b77b98069176d1776a9
Author: Piotr Findeisen <piotr.findei...@gmail.com>
AuthorDate: Fri Sep 12 07:31:07 2025 -0700

    Add assertion that ScalarUDFImpl implementation is consistent with declared 
return type (#17515)
    
    * Drop redundant special casing of no-args case in spark_array impl
    
    The default branch picks `Int32` when all arguments are Null-typed, so
    it's applicable to no-args just as good.
    
    * fix typo
    
    * Check function return value's type
    
    The planner takes into account the return type a function promises to
    return. It even passes it back on invoke as a reminder/convenience.
    Verify that each function delivers on the promise.
    
    * enable assertion for sqllogictest run locally
    
    * condition on debug_assertions instead of FF
    
    it's a bit of FF misuse
    
    * fix code
    
    * fixup! condition on debug_assertions instead of FF
    
    * Improve style
    
    * switch to internal_err
    
    * fixup! Improve style
---
 datafusion/expr/src/udf.rs                         | 18 ++++++++-
 datafusion/ffi/tests/ffi_integration.rs            |  2 +-
 datafusion/ffi/tests/ffi_udaf.rs                   |  2 +-
 datafusion/ffi/tests/ffi_udf.rs                    |  2 +-
 datafusion/ffi/tests/ffi_udtf.rs                   |  2 +-
 datafusion/ffi/tests/ffi_udwf.rs                   |  2 +-
 datafusion/spark/src/function/array/spark_array.rs | 45 ++++++++--------------
 datafusion/spark/src/function/url/parse_url.rs     |  7 ++--
 8 files changed, 43 insertions(+), 37 deletions(-)

diff --git a/datafusion/expr/src/udf.rs b/datafusion/expr/src/udf.rs
index db6d9aa420..758c6b5f2f 100644
--- a/datafusion/expr/src/udf.rs
+++ b/datafusion/expr/src/udf.rs
@@ -233,7 +233,23 @@ impl ScalarUDF {
     ///
     /// See [`ScalarUDFImpl::invoke_with_args`] for details.
     pub fn invoke_with_args(&self, args: ScalarFunctionArgs) -> 
Result<ColumnarValue> {
-        self.inner.invoke_with_args(args)
+        #[cfg(debug_assertions)]
+        let return_field = Arc::clone(&args.return_field);
+        let result = self.inner.invoke_with_args(args)?;
+        // Maybe this could be enabled always?
+        // This doesn't use debug_assert!, but it's meant to run anywhere 
except on production. It's same in spirit, thus conditioning on 
debug_assertions.
+        #[cfg(debug_assertions)]
+        {
+            if &result.data_type() != return_field.data_type() {
+                return datafusion_common::internal_err!("Function '{}' 
returned value of type '{:?}' while the following type was promised at planning 
time and expected: '{:?}'",
+                        self.name(),
+                        result.data_type(),
+                        return_field.data_type()
+                    );
+            }
+            // TODO verify return data is non-null when it was promised to be?
+        }
+        Ok(result)
     }
 
     /// Get the circuits of inner implementation
diff --git a/datafusion/ffi/tests/ffi_integration.rs 
b/datafusion/ffi/tests/ffi_integration.rs
index 1ef16fbaa4..eb53e76bfb 100644
--- a/datafusion/ffi/tests/ffi_integration.rs
+++ b/datafusion/ffi/tests/ffi_integration.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Add an additional module here for convenience to scope this to only
-/// when the feature integtation-tests is built
+/// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
     use datafusion::error::{DataFusionError, Result};
diff --git a/datafusion/ffi/tests/ffi_udaf.rs b/datafusion/ffi/tests/ffi_udaf.rs
index 31b1f47391..ffd99bac62 100644
--- a/datafusion/ffi/tests/ffi_udaf.rs
+++ b/datafusion/ffi/tests/ffi_udaf.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Add an additional module here for convenience to scope this to only
-/// when the feature integtation-tests is built
+/// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
     use arrow::array::Float64Array;
diff --git a/datafusion/ffi/tests/ffi_udf.rs b/datafusion/ffi/tests/ffi_udf.rs
index bbc23552de..fd6a84bcf5 100644
--- a/datafusion/ffi/tests/ffi_udf.rs
+++ b/datafusion/ffi/tests/ffi_udf.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Add an additional module here for convenience to scope this to only
-/// when the feature integtation-tests is built
+/// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
 
diff --git a/datafusion/ffi/tests/ffi_udtf.rs b/datafusion/ffi/tests/ffi_udtf.rs
index 5a46211d3b..8c1c64a092 100644
--- a/datafusion/ffi/tests/ffi_udtf.rs
+++ b/datafusion/ffi/tests/ffi_udtf.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Add an additional module here for convenience to scope this to only
-/// when the feature integtation-tests is built
+/// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
 
diff --git a/datafusion/ffi/tests/ffi_udwf.rs b/datafusion/ffi/tests/ffi_udwf.rs
index db9ebba0fd..18ffd0c5bc 100644
--- a/datafusion/ffi/tests/ffi_udwf.rs
+++ b/datafusion/ffi/tests/ffi_udwf.rs
@@ -16,7 +16,7 @@
 // under the License.
 
 /// Add an additional module here for convenience to scope this to only
-/// when the feature integtation-tests is built
+/// when the feature integration-tests is built
 #[cfg(feature = "integration-tests")]
 mod tests {
     use arrow::array::{create_array, ArrayRef};
diff --git a/datafusion/spark/src/function/array/spark_array.rs 
b/datafusion/spark/src/function/array/spark_array.rs
index 9ca6a8a3d1..1644cde7ab 100644
--- a/datafusion/spark/src/function/array/spark_array.rs
+++ b/datafusion/spark/src/function/array/spark_array.rs
@@ -73,26 +73,23 @@ impl ScalarUDFImpl for SparkArray {
     }
 
     fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
-        match arg_types.len() {
-            0 => Ok(empty_array_type()),
-            _ => {
-                let mut expr_type = DataType::Null;
-                for arg_type in arg_types {
-                    if !arg_type.equals_datatype(&DataType::Null) {
-                        expr_type = arg_type.clone();
-                        break;
-                    }
-                }
-
-                if expr_type.is_null() {
-                    expr_type = DataType::Int32;
-                }
-
-                Ok(DataType::List(Arc::new(Field::new_list_field(
-                    expr_type, true,
-                ))))
+        let mut expr_type = DataType::Null;
+        for arg_type in arg_types {
+            if !arg_type.equals_datatype(&DataType::Null) {
+                expr_type = arg_type.clone();
+                break;
             }
         }
+
+        if expr_type.is_null() {
+            expr_type = DataType::Int32;
+        }
+
+        Ok(DataType::List(Arc::new(Field::new(
+            ARRAY_FIELD_DEFAULT_NAME,
+            expr_type,
+            true,
+        ))))
     }
 
     fn return_field_from_args(&self, args: ReturnFieldArgs) -> 
Result<FieldRef> {
@@ -104,7 +101,7 @@ impl ScalarUDFImpl for SparkArray {
             .collect::<Vec<_>>();
         let return_type = self.return_type(&data_types)?;
         Ok(Arc::new(Field::new(
-            ARRAY_FIELD_DEFAULT_NAME,
+            "this_field_name_is_irrelevant",
             return_type,
             false,
         )))
@@ -143,15 +140,6 @@ impl ScalarUDFImpl for SparkArray {
     }
 }
 
-// Empty array is a special case that is useful for many other array functions
-pub(super) fn empty_array_type() -> DataType {
-    DataType::List(Arc::new(Field::new(
-        ARRAY_FIELD_DEFAULT_NAME,
-        DataType::Int32,
-        true,
-    )))
-}
-
 /// `make_array_inner` is the implementation of the `make_array` function.
 /// Constructs an array using the input `data` as `ArrayRef`.
 /// Returns a reference-counted `Array` instance result.
@@ -174,6 +162,7 @@ pub fn make_array_inner(arrays: &[ArrayRef]) -> 
Result<ArrayRef> {
             Ok(Arc::new(
                 SingleRowListArrayBuilder::new(array)
                     .with_nullable(true)
+                    
.with_field_name(Some(ARRAY_FIELD_DEFAULT_NAME.to_string()))
                     .build_list_array(),
             ))
         }
diff --git a/datafusion/spark/src/function/url/parse_url.rs 
b/datafusion/spark/src/function/url/parse_url.rs
index 5c3b1d7d3e..f9c33060cc 100644
--- a/datafusion/spark/src/function/url/parse_url.rs
+++ b/datafusion/spark/src/function/url/parse_url.rs
@@ -19,7 +19,8 @@ use std::any::Any;
 use std::sync::Arc;
 
 use arrow::array::{
-    Array, ArrayRef, GenericStringBuilder, LargeStringArray, StringArray, 
StringArrayType,
+    Array, ArrayRef, GenericStringBuilder, LargeStringArray, StringArray,
+    StringArrayType, StringViewArray,
 };
 use arrow::datatypes::DataType;
 use datafusion_common::cast::{
@@ -222,7 +223,7 @@ fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> {
                 )
             }
             (DataType::Utf8View, DataType::Utf8View, DataType::Utf8View) => {
-                process_parse_url::<_, _, _, StringArray>(
+                process_parse_url::<_, _, _, StringViewArray>(
                     as_string_view_array(url)?,
                     as_string_view_array(part)?,
                     as_string_view_array(key)?,
@@ -255,7 +256,7 @@ fn spark_parse_url(args: &[ArrayRef]) -> Result<ArrayRef> {
                 )
             }
             (DataType::Utf8View, DataType::Utf8View) => {
-                process_parse_url::<_, _, _, StringArray>(
+                process_parse_url::<_, _, _, StringViewArray>(
                     as_string_view_array(url)?,
                     as_string_view_array(part)?,
                     &key,


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@datafusion.apache.org
For additional commands, e-mail: commits-h...@datafusion.apache.org

Reply via email to