viirya commented on code in PR #8578:
URL: https://github.com/apache/arrow-datafusion/pull/8578#discussion_r1430596344
##########
datafusion/expr/src/udf.rs:
##########
@@ -124,22 +169,116 @@ impl ScalarUDF {
&self.aliases
}
- /// Returns this function's signature (what input types are accepted)
+ /// Returns this function's [`Signature`] (what input types are accepted)
pub fn signature(&self) -> &Signature {
&self.signature
}
- /// Return the type of the function given its input types
+ /// The datatype this function returns given the input argument input types
pub fn return_type(&self, args: &[DataType]) -> Result<DataType> {
// Old API returns an Arc of the datatype for some reason
let res = (self.return_type)(args)?;
Ok(res.as_ref().clone())
}
- /// Return the actual implementation
+ /// Return an [`Arc`] to the function implementation
pub fn fun(&self) -> ScalarFunctionImplementation {
self.fun.clone()
}
+}
+
+impl<F> From<F> for ScalarUDF
+where
+ F: ScalarUDFImpl + Send + Sync + 'static,
+{
+ fn from(fun: F) -> Self {
+ Self::new_from_trait(fun)
+ }
+}
+
+/// Trait for implementing [`ScalarUDF`].
+///
+/// This trait exposes the full API for implementing user defined functions and
+/// can be used to implement any function.
+///
+/// See [`advanced_udf.rs`] for a full example with implementation. See
+/// [`ScalarUDF`] for details on a simpler API.
+///
+///
+/// [`advanced_udf.rs`]:
https://github.com/apache/arrow-datafusion/blob/main/datafusion-examples/examples/advanced_udf.rs
+/// # Basic Example
+/// ```
+/// # use std::any::Any;
+/// # use arrow::datatypes::DataType;
+/// # use datafusion_common::{DataFusionError, plan_err, Result};
+/// # use datafusion_expr::{col, ColumnarValue, Signature, Volatility};
+/// # use datafusion_expr::{ScalarUDFImpl, ScalarUDF};
+/// struct AddOne {
+/// signature: Signature
+/// };
+///
+/// impl AddOne {
+/// fn new() -> Self {
+/// Self {
+/// signature: Signature::uniform(1, vec![DataType::Int32],
Volatility::Immutable)
+/// }
+/// }
+/// }
+///
+/// /// Implement the ScalarUDFImpl trait for AddOne
+/// impl ScalarUDFImpl for AddOne {
+/// fn as_any(&self) -> &dyn Any { self }
+/// fn name(&self) -> &str { "add_one" }
+/// fn signature(&self) -> &Signature { &self.signature }
+/// fn return_type(&self, args: &[DataType]) -> Result<DataType> {
+/// if !matches!(args.get(0), Some(&DataType::Int32)) {
+/// return plan_err!("add_one only accepts Int32 arguments");
+/// }
+/// Ok(DataType::Int32)
+/// }
+/// // The actual implementation would add one to the argument
+/// fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue> {
unimplemented!() }
+/// }
+///
+/// // Create a new ScalarUDF from the implementation
+/// let add_one = ScalarUDF::from(AddOne::new());
+///
+/// // Call the function `add_one(col)`
+/// let expr = add_one.call(vec![col("a")]);
+/// ```
+pub trait ScalarUDFImpl {
+ /// Returns this object as an [`Any`] trait object
+ fn as_any(&self) -> &dyn Any;
- // TODO maybe add an invoke() method that runs the actual function?
+ /// Returns this function's name
+ fn name(&self) -> &str;
+
+ /// Returns the function's [`Signature`] for information about what input
+ /// types are accepted and the function's Volatility.
+ fn signature(&self) -> &Signature;
+
+ /// What [`DataType`] will be returned by this function, given the types of
+ /// the arguments
+ fn return_type(&self, arg_types: &[DataType]) -> Result<DataType>;
+
+ /// Invoke the function on `args`, returning the appropriate result
+ ///
+ /// The function will be invoked passed with the slice of [`ColumnarValue`]
+ /// (either scalar or array).
+ ///
+ /// # Zero Argument Functions
+ /// If the function has zero parameters (e.g. `now()`) it will be passed a
+ /// single element slice which is a a null array to indicate the batch's
row
+ /// count (so the function can know the resulting array size).
+ ///
+ /// # Performance
+ /// Many functions can be optimized for the case when one or more of their
+ /// arguments are constant values [`ColumnarValue::Scalar`].
+ fn invoke(&self, args: &[ColumnarValue]) -> Result<ColumnarValue>;
+
+ /// Returns any aliases (alternate names) for this function. This should
not
+ /// include the value of [`Self::name`]. Defaults to `[]` (no aliases)
Review Comment:
Hmm, what are the aliases used for? Can we simply add a few words for it in
API doc?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]