rtpsw commented on code in PR #14682:
URL: https://github.com/apache/arrow/pull/14682#discussion_r1056155412
##########
python/pyarrow/_compute.pyx:
##########
@@ -2568,31 +2580,53 @@ cdef inline CFunctionDoc _make_function_doc(dict
func_doc) except *:
return f_doc
-cdef object box_scalar_udf_context(const CScalarUdfContext& c_context):
- cdef ScalarUdfContext context = ScalarUdfContext.__new__(ScalarUdfContext)
+cdef object box_udf_context(const CUdfContext& c_context):
+ cdef UdfContext context = UdfContext.__new__(UdfContext)
context.init(c_context)
return context
-cdef _scalar_udf_callback(user_function, const CScalarUdfContext& c_context,
inputs):
+cdef _udf_callback(user_function, const CUdfContext& c_context, inputs):
"""
- Helper callback function used to wrap the ScalarUdfContext from Python to
C++
+ Helper callback function used to wrap the UdfContext from Python to C++
execution.
"""
- context = box_scalar_udf_context(c_context)
+ context = box_udf_context(c_context)
return user_function(context, *inputs)
-def _get_scalar_udf_context(memory_pool, batch_length):
- cdef CScalarUdfContext c_context
+def _get_udf_context(memory_pool, batch_length):
+ cdef CUdfContext c_context
c_context.pool = maybe_unbox_memory_pool(memory_pool)
c_context.batch_length = batch_length
- context = box_scalar_udf_context(c_context)
+ context = box_udf_context(c_context)
return context
-def register_scalar_function(func, function_name, function_doc, in_types,
- out_type):
+ctypedef CStatus (*CRegisterUdf)(PyObject* function, function[CallbackUdf]
wrapper,
+ const CUdfOptions& options,
CFunctionRegistry* registry)
+
+cdef class RegisterUdf(_Weakrefable):
+ cdef CRegisterUdf register_func
+
+ cdef void init(self, const CRegisterUdf register_func):
+ self.register_func = register_func
+
+
+cdef get_register_scalar_function():
+ cdef RegisterUdf reg = RegisterUdf.__new__(RegisterUdf)
+ reg.register_func = RegisterScalarFunction
+ return reg
+
+
+cdef get_register_tabular_function():
+ cdef RegisterUdf reg = RegisterUdf.__new__(RegisterUdf)
+ reg.register_func = RegisterTabularFunction
+ return reg
+
Review Comment:
The proposed common design for scalar and tabular function registration
requires a parameter to distinguish between the two. I considered 3 options to
do so:
1. Using a Boolean parameter, such as `is_tabular`. This seemed the least
elegant to me, and is definitely the least extensible.
2. Using and enum parameter, which would currently have two values for
scalar and tabular. This is better but seemed undesirable because it is not
clear what this enum is. Is it the function's kind? the function output's kind?
something else? See also [this
post](https://github.com/apache/arrow/pull/14682/#discussion_r1050597587).
3. Using an interface parameter. This seemed elegant and extensible to me.
The interface just describes how the function is registered, and is easy enough
to implement. At least for the time being, the intention is for this interface
to be internal.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]