Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/19325#discussion_r140941134
--- Diff: python/pyspark/sql/functions.py ---
@@ -2183,14 +2187,28 @@ def pandas_udf(f=None, returnType=StringType()):
:param f: python function if used as a standalone function
:param returnType: a :class:`pyspark.sql.types.DataType` object
- # TODO: doctest
+ >>> from pyspark.sql.types import IntegerType, StringType
+ >>> slen = pandas_udf(lambda s: s.str.len(), IntegerType())
+ >>> @pandas_udf(returnType=StringType())
+ ... def to_upper(s):
+ ... return s.str.upper()
+ ...
+ >>> @pandas_udf(returnType="integer")
+ ... def add_one(x):
+ ... return x + 1
+ ...
+ >>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name",
"age"))
+ >>> df.select(slen("name").alias("slen(name)"), to_upper("name"),
add_one("age")) \\
+ ... .show() # doctest: +SKIP
--- End diff --
I just double checked it passes
```
./run-tests --python-executables=pypy --modules pyspark-sql
...
Will test against the following Python executables: ['pypy']
Will test the following Python modules: ['pyspark-sql']
Starting test(pypy): pyspark.sql.functions
...
Finished test(pypy): pyspark.sql.functions (74s)
...
```
Also, checked without ` # doctest: +SKIP`:
```diff
diff --git a/python/pyspark/sql/functions.py
b/python/pyspark/sql/functions.py
index 63e9a830bbc..3265ecc974b 100644
--- a/python/pyspark/sql/functions.py
+++ b/python/pyspark/sql/functions.py
@@ -2199,7 +2199,7 @@ def pandas_udf(f=None, returnType=StringType()):
...
>>> df = spark.createDataFrame([(1, "John Doe", 21)], ("id", "name",
"age"))
>>> df.select(slen("name").alias("slen(name)"), to_upper("name"),
add_one("age")) \\
- ... .show() # doctest: +SKIP
+ ... .show()
+----------+--------------+------------+
|slen(name)|to_upper(name)|add_one(age)|
+----------+--------------+------------+
```
```
./run-tests --python-executables=pypy --modules pyspark-sql
...
Will test against the following Python executables: ['pypy']
Will test the following Python modules: ['pyspark-sql']
...
Starting test(pypy): pyspark.sql.functions
...
Failed example:
df.select(slen("name").alias("slen(name)"), to_upper("name"),
add_one("age")) \
.show()
Exception raised:
Traceback (most recent call last):
File
"/usr/local/Cellar/pypy/5.8.0/libexec/lib-python/2.7/doctest.py", line 1315, in
__run
compileflags, 1) in test.globs
File "<doctest pyspark.sql.functions.pandas_udf[5]>", line 1, in
<module>
df.select(slen("name").alias("slen(name)"), to_upper("name"),
add_one("age")) \
File "/.../spark/python/pyspark/sql/dataframe.py", line 347, in show
print(self._jdf.showString(n, 20, vertical))
File
"/.../spark/python/lib/py4j-0.10.6-src.zip/py4j/java_gateway.py", line 1160, in
__call__
answer, self.gateway_client, self.target_id, self.name)
File "/.../spark/python/pyspark/sql/utils.py", line 63, in deco
return f(*a, **kw)
File "/.../spark/python/lib/py4j-0.10.6-src.zip/py4j/protocol.py",
line 320, in get_return_value
format(target_id, ".", name), value)
Py4JJavaError: An error occurred while calling o1373.showString.
: org.apache.spark.SparkException: Job aborted due to stage failure:
Task 0 in stage 93.0 failed 1 times, most recent failure: Lost task 0.0 in
stage 93.0 (TID 1093, localhost, executor driver):
org.apache.spark.api.python.PythonException: Traceback (most recent call last):
File "/.../spark/python/lib/pyspark.zip/pyspark/worker.py", line 190,
in main
func, profiler, deserializer, serializer = read_udfs(pickleSer,
infile, eval_type)
File "/.../spark/python/lib/pyspark.zip/pyspark/worker.py", line 112,
in read_udfs
arg_offsets, udf = read_single_udf(pickleSer, infile, eval_type)
File "/.../spark/python/lib/pyspark.zip/pyspark/worker.py", line 102,
in read_single_udf
return arg_offsets, wrap_pandas_udf(row_func, return_type)
File "/.../spark/python/lib/pyspark.zip/pyspark/worker.py", line 77,
in wrap_pandas_udf
arrow_return_type = toArrowType(return_type)
File "/.../spark/python/lib/pyspark.zip/pyspark/sql/types.py", line
1603, in toArrowType
import pyarrow as pa
ImportError: No module named pyarrow
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]