[ 
https://issues.apache.org/jira/browse/SPARK-23569?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Stu (Michael Stewart) updated SPARK-23569:
------------------------------------------
    Description: 
When invoked against a type annotated function pandas_udf raises:

`ValueError: Function has keyword-only parameters or annotations, use 
getfullargspec() API which can support them`

 

To reproduce:

 
{code:java}
from pyspark.sql import SparkSession

from pyspark.sql.functions import pandas_udf, PandasUDFType, col, lit

spark = SparkSession.builder.getOrCreate()

df = spark.range(12).withColumn('b', col('id') * 2)

def ok(a,b): return a*b

df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b')).show()  # 
no problems

import pandas as pd

def ok(a: pd.Series,b: pd.Series) -> pd.Series: return a*b

df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))

 

---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-2e6ae67b15ee> in <module>()
----> 1 df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))

/opt/miniconda/lib/python3.6/site-packages/pyspark/sql/functions.py in 
pandas_udf(f, returnType, functionType)
2277 return functools.partial(_create_udf, returnType=return_type, 
evalType=eval_type)
2278 else:
-> 2279 return _create_udf(f=f, returnType=return_type, evalType=eval_type)
2280
2281

/opt/miniconda/lib/python3.6/site-packages/pyspark/sql/udf.py in _create_udf(f, 
returnType, evalType)
44
45 require_minimum_pyarrow_version()
---> 46 argspec = inspect.getargspec(f)
47
48 if evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF and len(argspec.args) == 
0 and \

/opt/miniconda/lib/python3.6/inspect.py in getargspec(func)
1043 getfullargspec(func)
1044 if kwonlyargs or ann:
-> 1045 raise ValueError("Function has keyword-only parameters or annotations"
1046 ", use getfullargspec() API which can support them")
1047 return ArgSpec(args, varargs, varkw, defaults)

ValueError: Function has keyword-only parameters or annotations, use 
getfullargspec() API which can support them

{code}

  was:
When invoked against a type annotated function pandas_udf raises:

`ValueError: Function has keyword-only parameters or annotations, use 
getfullargspec() API which can support them`

 

To reproduce:

```

from pyspark.sql import SparkSession

from pyspark.sql.functions import pandas_udf, PandasUDFType, col, lit

spark = SparkSession.builder.getOrCreate()

df = spark.range(12).withColumn('b', col('id') * 2)

def ok(a,b): return a*b

df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b')).show()  # 
no problems



import pandas as pd

def ok(a: pd.Series,b: pd.Series) -> pd.Series: return a*b

df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))

 

---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
<ipython-input-17-2e6ae67b15ee> in <module>()
----> 1 df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))

/opt/miniconda/lib/python3.6/site-packages/pyspark/sql/functions.py in 
pandas_udf(f, returnType, functionType)
 2277 return functools.partial(_create_udf, returnType=return_type, 
evalType=eval_type)
 2278 else:
-> 2279 return _create_udf(f=f, returnType=return_type, evalType=eval_type)
 2280
 2281

/opt/miniconda/lib/python3.6/site-packages/pyspark/sql/udf.py in _create_udf(f, 
returnType, evalType)
 44
 45 require_minimum_pyarrow_version()
---> 46 argspec = inspect.getargspec(f)
 47
 48 if evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF and len(argspec.args) 
== 0 and \

/opt/miniconda/lib/python3.6/inspect.py in getargspec(func)
 1043 getfullargspec(func)
 1044 if kwonlyargs or ann:
-> 1045 raise ValueError("Function has keyword-only parameters or annotations"
 1046 ", use getfullargspec() API which can support them")
 1047 return ArgSpec(args, varargs, varkw, defaults)

ValueError: Function has keyword-only parameters or annotations, use 
getfullargspec() API which can support them

```

 


> pandas_udf does not work with type-annotated python functions
> -------------------------------------------------------------
>
>                 Key: SPARK-23569
>                 URL: https://issues.apache.org/jira/browse/SPARK-23569
>             Project: Spark
>          Issue Type: Bug
>          Components: PySpark
>    Affects Versions: 2.3.0
>         Environment: python 3.6 | pyspark 2.3.0 | Using Scala version 2.11.8, 
> OpenJDK 64-Bit Server VM, 1.8.0_141 | Revision 
> a0d7949896e70f427e7f3942ff340c9484ff0aab
>            Reporter: Stu (Michael Stewart)
>            Priority: Major
>
> When invoked against a type annotated function pandas_udf raises:
> `ValueError: Function has keyword-only parameters or annotations, use 
> getfullargspec() API which can support them`
>  
> To reproduce:
>  
> {code:java}
> from pyspark.sql import SparkSession
> from pyspark.sql.functions import pandas_udf, PandasUDFType, col, lit
> spark = SparkSession.builder.getOrCreate()
> df = spark.range(12).withColumn('b', col('id') * 2)
> def ok(a,b): return a*b
> df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b')).show()  
> # no problems
> import pandas as pd
> def ok(a: pd.Series,b: pd.Series) -> pd.Series: return a*b
> df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))
>  
> ---------------------------------------------------------------------------
> ValueError Traceback (most recent call last)
> <ipython-input-17-2e6ae67b15ee> in <module>()
> ----> 1 df.withColumn('ok', pandas_udf(f=ok, returnType='bigint')('id','b'))
> /opt/miniconda/lib/python3.6/site-packages/pyspark/sql/functions.py in 
> pandas_udf(f, returnType, functionType)
> 2277 return functools.partial(_create_udf, returnType=return_type, 
> evalType=eval_type)
> 2278 else:
> -> 2279 return _create_udf(f=f, returnType=return_type, evalType=eval_type)
> 2280
> 2281
> /opt/miniconda/lib/python3.6/site-packages/pyspark/sql/udf.py in 
> _create_udf(f, returnType, evalType)
> 44
> 45 require_minimum_pyarrow_version()
> ---> 46 argspec = inspect.getargspec(f)
> 47
> 48 if evalType == PythonEvalType.SQL_SCALAR_PANDAS_UDF and len(argspec.args) 
> == 0 and \
> /opt/miniconda/lib/python3.6/inspect.py in getargspec(func)
> 1043 getfullargspec(func)
> 1044 if kwonlyargs or ann:
> -> 1045 raise ValueError("Function has keyword-only parameters or annotations"
> 1046 ", use getfullargspec() API which can support them")
> 1047 return ArgSpec(args, varargs, varkw, defaults)
> ValueError: Function has keyword-only parameters or annotations, use 
> getfullargspec() API which can support them
> {code}



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to