alamb commented on code in PR #8713:
URL: https://github.com/apache/arrow-datafusion/pull/8713#discussion_r1439631495


##########
datafusion/expr/src/udf.rs:
##########
@@ -101,51 +79,37 @@ impl ScalarUDF {
         return_type: &ReturnTypeFunction,
         fun: &ScalarFunctionImplementation,
     ) -> Self {
-        Self {
+        Self::new_from_impl(ScalarUdfLegacyWrapper {
             name: name.to_owned(),
             signature: signature.clone(),
             return_type: return_type.clone(),
             fun: fun.clone(),
-            aliases: vec![],
-        }
+        })
     }
 
     /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
     ///
     /// Note this is the same as using the `From` impl (`ScalarUDF::from`)
     pub fn new_from_impl<F>(fun: F) -> ScalarUDF
     where
-        F: ScalarUDFImpl + Send + Sync + 'static,
+        F: ScalarUDFImpl + 'static,
     {
-        // TODO change the internal implementation to use the trait object

Review Comment:
   ✅ 



##########
datafusion/physical-expr/src/udf.rs:
##########
@@ -36,7 +36,7 @@ pub fn create_physical_expr(
 
     Ok(Arc::new(ScalarFunctionExpr::new(
         fun.name(),
-        fun.fun().clone(),
+        fun.fun(),

Review Comment:
   this is a drive by cleanup I noticed while working on the code



##########
datafusion/expr/src/udf.rs:
##########
@@ -292,3 +271,105 @@ pub trait ScalarUDFImpl {
         &[]
     }
 }
+
+/// ScalarUDF that adds an alias to the underlying function. It is better to
+/// implement [`ScalarUDFImpl`], which supports aliases, directly if possible.
+#[derive(Debug)]
+struct AliasedScalarUDFImpl {
+    inner: ScalarUDF,
+    aliases: Vec<String>,
+}
+
+impl AliasedScalarUDFImpl {
+    pub fn new(
+        inner: ScalarUDF,
+        new_aliases: impl IntoIterator<Item = &'static str>,
+    ) -> Self {
+        let mut aliases = inner.aliases().to_vec();
+        aliases.extend(new_aliases.into_iter().map(|s| s.to_string()));
+
+        Self { inner, aliases }
+    }
+}
+
+impl ScalarUDFImpl for AliasedScalarUDFImpl {
+    fn as_any(&self) -> &dyn Any {
+        self
+    }
+    fn name(&self) -> &str {
+        self.inner.name()
+    }
+
+    fn signature(&self) -> &Signature {
+        self.inner.signature()
+    }
+
+    fn return_type(&self, arg_types: &[DataType]) -> Result<DataType> {
+        self.inner.return_type(arg_types)
+    }
+
+    fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
+        self.inner.invoke(args)
+    }
+
+    fn aliases(&self) -> &[String] {
+        &self.aliases
+    }
+}
+
+/// Implementation of [`ScalarUDFImpl`] that wraps the function style pointers 
of the older API

Review Comment:
   This is basically the old implementaiton of `ScalarUDF` moved into its own 
struct



##########
datafusion/expr/src/udf.rs:
##########
@@ -35,57 +35,35 @@ use std::sync::Arc;
 /// functions you supply such name, type signature, return type, and actual
 /// implementation.
 ///
-///
 /// 1. For simple (less performant) use cases, use [`create_udf`] and 
[`simple_udf.rs`].
 ///
 /// 2. For advanced use cases, use  [`ScalarUDFImpl`] and [`advanced_udf.rs`].
 ///
+/// # API Note
+///
+/// This is a separate struct from `ScalarUDFImpl` to maintain backwards
+/// compatibility with the older API.
+///
 /// [`create_udf`]: crate::expr_fn::create_udf
 /// [`simple_udf.rs`]: 
https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/simple_udf.rs
 /// [`advanced_udf.rs`]: 
https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
-#[derive(Clone)]
+#[derive(Debug, Clone)]
 pub struct ScalarUDF {

Review Comment:
   The API for ScalarUDF is not changed at all -- only its internal 
implementation



##########
datafusion/expr/src/udf.rs:
##########
@@ -101,51 +79,37 @@ impl ScalarUDF {
         return_type: &ReturnTypeFunction,
         fun: &ScalarFunctionImplementation,
     ) -> Self {
-        Self {
+        Self::new_from_impl(ScalarUdfLegacyWrapper {
             name: name.to_owned(),
             signature: signature.clone(),
             return_type: return_type.clone(),
             fun: fun.clone(),
-            aliases: vec![],
-        }
+        })
     }
 
     /// Create a new `ScalarUDF` from a `[ScalarUDFImpl]` trait object
     ///
     /// Note this is the same as using the `From` impl (`ScalarUDF::from`)
     pub fn new_from_impl<F>(fun: F) -> ScalarUDF
     where
-        F: ScalarUDFImpl + Send + Sync + 'static,
+        F: ScalarUDFImpl + 'static,
     {
-        // TODO change the internal implementation to use the trait object
-        let arc_fun = Arc::new(fun);
-        let captured_self = arc_fun.clone();
-        let return_type: ReturnTypeFunction = Arc::new(move |arg_types| {
-            let return_type = captured_self.return_type(arg_types)?;
-            Ok(Arc::new(return_type))
-        });
-
-        let captured_self = arc_fun.clone();
-        let func: ScalarFunctionImplementation =
-            Arc::new(move |args| captured_self.invoke(args));
-
         Self {
-            name: arc_fun.name().to_string(),
-            signature: arc_fun.signature().clone(),
-            return_type: return_type.clone(),
-            fun: func,
-            aliases: arc_fun.aliases().to_vec(),
+            inner: Arc::new(fun),
         }
     }
 
-    /// Adds additional names that can be used to invoke this function, in 
addition to `name`
-    pub fn with_aliases(
-        mut self,
-        aliases: impl IntoIterator<Item = &'static str>,
-    ) -> Self {
-        self.aliases
-            .extend(aliases.into_iter().map(|s| s.to_string()));
-        self
+    /// Return the underlying [`ScalarUDFImpl`] trait object for this function
+    pub fn inner(&self) -> Arc<dyn ScalarUDFImpl> {
+        self.inner.clone()
+    }
+
+    /// Adds additional names that can be used to invoke this function, in
+    /// addition to `name`
+    ///
+    /// If you implement [`ScalarUDFImpl`] directly you should return aliases 
directly.
+    pub fn with_aliases(self, aliases: impl IntoIterator<Item = &'static str>) 
-> Self {
+        Self::new_from_impl(AliasedScalarUDFImpl::new(self, aliases))

Review Comment:
   This is somewhat of a hack (to have a wrapper around the scalar UDF). It may 
make more sense to simply remove the call to `with_aliases` -- however, since 
it was released in datafusion 34.0.0 -- 
https://docs.rs/datafusion/34.0.0/datafusion/physical_plan/udf/struct.ScalarUDF.html
 -- that would be a breaking API change.
   
   We could deprecate the API 🤔 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to