Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19630#discussion_r148803174
  
    --- Diff: python/pyspark/sql/functions.py ---
    @@ -2208,16 +2093,26 @@ def udf(f=None, returnType=StringType()):
         |         8|      JOHN DOE|          22|
         +----------+--------------+------------+
         """
    -    return _create_udf(f, returnType=returnType, 
pythonUdfType=PythonUdfType.NORMAL_UDF)
    +    # decorator @udf, @udf(), @udf(dataType())
    +    if f is None or isinstance(f, (str, DataType)):
    +        # If DataType has been passed as a positional argument
    +        # for decorator use it as a returnType
    +        return_type = f or returnType
    +        return functools.partial(_create_udf, returnType=return_type,
    +                                 udfType=PythonEvalType.SQL_BATCHED_UDF)
    +    else:
    +        return _create_udf(f=f, returnType=returnType,
    +                           udfType=PythonEvalType.SQL_BATCHED_UDF)
     
     
     @since(2.3)
    -def pandas_udf(f=None, returnType=StringType()):
    +def pandas_udf(f=None, returnType=None, functionType=None):
    --- End diff --
    
    shall we specify the default value `PythonEvalType.PANDAS_SCALAR_UDF` here?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to