icexelloss commented on code in PR #35514:
URL: https://github.com/apache/arrow/pull/35514#discussion_r1218525678
##########
python/pyarrow/_compute.pyx:
##########
@@ -2738,9 +2744,83 @@ def register_scalar_function(func, function_name,
function_doc, in_types, out_ty
21
]
"""
- return _register_scalar_like_function(get_register_scalar_function(),
- func, function_name, function_doc,
in_types,
- out_type, func_registry)
+ return _register_user_defined_function(get_register_scalar_function(),
+ func, function_name, function_doc,
in_types,
+ out_type, func_registry)
+
+
+def register_aggregate_function(func, function_name, function_doc, in_types,
out_type,
+ func_registry=None):
+ """
+ Register a user-defined non-decomposable aggregate function.
+
+ A non-decomposable aggregation function is a function that executes
+ aggregate operations on the whole data that it is aggregating.
+ In other words, non-decomposable aggregate function cannot be
+ split into consume/merge/finalize steps.
+
+ This is mostly useful with segemented aggregation, where the data
+ to be aggregated is continuous.
+
+ Parameters
+ ----------
+ func : callable
+ A callable implementing the user-defined function.
+ The first argument is the context argument of type
+ UdfContext.
+ Then, it must take arguments equal to the number of
+ in_types defined. It must return Scalar matching the
+ out_type.
+ To define a varargs function, pass a callable that takes
+ varargs. The in_type needs to match in type of inputs when
+ the function gets called.
+
+ function_name : str
+ Name of the function. This name must be globally unique.
+ function_doc : dict
+ A dictionary object with keys "summary" (str),
+ and "description" (str).
+ in_types : Dict[str, DataType]
+ A dictionary mapping function argument names to
+ their respective DataType.
+ The argument names will be used to generate
+ documentation for the function. The number of
+ arguments specified here determines the function
+ arity.
+ out_type : DataType
+ Output type of the function.
+ func_registry : FunctionRegistry
+ Optional function registry to use instead of the default global one.
+
+ Examples
+ --------
+ >>> import numpy as np
+ >>> import pyarrow as pa
+ >>> import pyarrow.compute as pc
+ >>>
+ >>> func_doc = {}
+ >>> func_doc["summary"] = "simple mean udf"
+ >>> func_doc["description"] = "compute mean"
+ >>>
+ >>> def compute_mean(ctx, array):
+ ... return pa.scalar(np.nanmean(array))
+ >>>
+ >>> func_name = "py_compute_mean"
+ >>> in_types = {"array": pa.int64()}
+ >>> out_type = pa.float64()
+ >>> pc.register_aggregate_function(compute_mean, func_name, func_doc,
Review Comment:
Updated
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]