Github user icexelloss commented on a diff in the pull request:
https://github.com/apache/spark/pull/19630#discussion_r148807433
--- Diff: python/pyspark/sql/functions.py ---
@@ -2208,16 +2093,26 @@ def udf(f=None, returnType=StringType()):
| 8| JOHN DOE| 22|
+----------+--------------+------------+
"""
- return _create_udf(f, returnType=returnType,
pythonUdfType=PythonUdfType.NORMAL_UDF)
+ # decorator @udf, @udf(), @udf(dataType())
+ if f is None or isinstance(f, (str, DataType)):
+ # If DataType has been passed as a positional argument
+ # for decorator use it as a returnType
+ return_type = f or returnType
+ return functools.partial(_create_udf, returnType=return_type,
+ udfType=PythonEvalType.SQL_BATCHED_UDF)
+ else:
+ return _create_udf(f=f, returnType=returnType,
+ udfType=PythonEvalType.SQL_BATCHED_UDF)
@since(2.3)
-def pandas_udf(f=None, returnType=StringType()):
+def pandas_udf(f=None, returnType=None, functionType=None):
--- End diff --
The default value is effectively `PandasUdfType.SCALAR`
This is bit tricky to put default value in the function args because of
decorator makes the branching logic too complicated.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]