Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/19325#discussion_r140626184
--- Diff: python/pyspark/sql/tests.py ---
@@ -3344,6 +3342,22 @@ def test_vectorized_udf_wrong_return_type(self):
'Invalid.*type.*string'):
df.select(f(col('x'))).collect()
+ def test_vectorized_udf_decorator(self):
+ from pyspark.sql.functions import pandas_udf, col
+ df = self.spark.range(10)
+
+ @pandas_udf(returnType=LongType())
+ def identity(x):
+ return x
+ res = df.select(identity(col('id')))
+ self.assertEquals(df.collect(), res.collect())
+
+ def test_vectorized_udf_empty_partition(self):
+ from pyspark.sql.functions import pandas_udf, col
+ df = self.spark.createDataFrame(self.sc.parallelize([Row(id=1)],
2))
--- End diff --
Oh. I see. One partition is empty and it is related to the added stuff in
`ArrowEvalPythonExec`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]