HyukjinKwon commented on a change in pull request #27165: 
[SPARK-28264][PYTHON][SQL] Support type hints in pandas UDF and rename/move 
inconsistent pandas UDF types
URL: https://github.com/apache/spark/pull/27165#discussion_r368806345
 
 

 ##########
 File path: python/pyspark/sql/pandas/functions.py
 ##########
 @@ -490,31 +405,78 @@ def pandas_udf(f=None, returnType=None, 
functionType=None):
             eval_type = returnType
         else:
             # @pandas_udf(dataType) or @pandas_udf(returnType=dataType)
-            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+            eval_type = None
     else:
         return_type = returnType
 
         if functionType is not None:
             eval_type = functionType
         else:
-            eval_type = PythonEvalType.SQL_SCALAR_PANDAS_UDF
+            eval_type = None
 
     if return_type is None:
-        raise ValueError("Invalid returnType: returnType can not be None")
+        raise ValueError("Invalid return type: returnType can not be None")
 
     if eval_type not in [PythonEvalType.SQL_SCALAR_PANDAS_UDF,
                          PythonEvalType.SQL_SCALAR_PANDAS_ITER_UDF,
                          PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF,
                          PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF,
                          PythonEvalType.SQL_MAP_PANDAS_ITER_UDF,
-                         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF]:
-        raise ValueError("Invalid functionType: "
+                         PythonEvalType.SQL_COGROUPED_MAP_PANDAS_UDF,
+                         None]:  # None means it should infer the type from 
type hints.
+
+        raise ValueError("Invalid function type: "
                          "functionType must be one the values from 
PandasUDFType")
 
     if is_decorator:
-        return functools.partial(_create_udf, returnType=return_type, 
evalType=eval_type)
+        return functools.partial(_create_pandas_udf, returnType=return_type, 
evalType=eval_type)
     else:
-        return _create_udf(f=f, returnType=return_type, evalType=eval_type)
+        return _create_pandas_udf(f=f, returnType=return_type, 
evalType=eval_type)
+
+
+def _create_pandas_udf(f, returnType, evalType):
+    argspec = _get_argspec(f)
+
+    # pandas UDF by type hints.
+    if sys.version_info >= (3, 6):
+        from inspect import signature
+
+        if evalType is not None:
+            warnings.warn(
 
 Review comment:
   I actually would like to choose one side and encourage users to give 
feedbacks. Given my interactions here, using type hints seems preferred. Let me 
try to fix the condition.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to