Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/19505#discussion_r144859680
--- Diff: python/pyspark/sql/functions.py ---
@@ -2121,33 +2127,35 @@ def wrapper(*args):
wrapper.func = self.func
wrapper.returnType = self.returnType
- wrapper.vectorized = self.vectorized
+ wrapper.pythonUdfType = self.pythonUdfType
return wrapper
-def _create_udf(f, returnType, vectorized):
+def _create_udf(f, returnType, pythonUdfType):
- def _udf(f, returnType=StringType(), vectorized=vectorized):
- if vectorized:
+ def _udf(f, returnType=StringType(), pythonUdfType=pythonUdfType):
+ if pythonUdfType == PythonUdfType.PANDAS_UDF \
+ or pythonUdfType == PythonUdfType.PANDAS_GROUPED_UDF:
import inspect
argspec = inspect.getargspec(f)
if len(argspec.args) == 0 and argspec.varargs is None:
raise ValueError(
"0-arg pandas_udfs are not supported. "
"Instead, create a 1-arg pandas_udf and ignore the arg
in your function."
--- End diff --
Thanks, let me try.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]