Github user cloud-fan commented on a diff in the pull request:
https://github.com/apache/spark/pull/19505#discussion_r144859561
--- Diff: python/pyspark/sql/functions.py ---
@@ -2121,33 +2127,35 @@ def wrapper(*args):
wrapper.func = self.func
wrapper.returnType = self.returnType
- wrapper.vectorized = self.vectorized
+ wrapper.pythonUdfType = self.pythonUdfType
return wrapper
-def _create_udf(f, returnType, vectorized):
+def _create_udf(f, returnType, pythonUdfType):
- def _udf(f, returnType=StringType(), vectorized=vectorized):
- if vectorized:
+ def _udf(f, returnType=StringType(), pythonUdfType=pythonUdfType):
+ if pythonUdfType == PythonUdfType.PANDAS_UDF \
+ or pythonUdfType == PythonUdfType.PANDAS_GROUPED_UDF:
--- End diff --
shall we add the check that `PANDAS_GROUPED_UDF` can only take one
parameter?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]