Github user icexelloss commented on a diff in the pull request:
https://github.com/apache/spark/pull/19630#discussion_r151605217
--- Diff: python/pyspark/sql/tests.py ---
@@ -3166,6 +3166,92 @@ def test_filtered_frame(self):
self.assertTrue(pdf.empty)
+class PandasUDFTests(ReusedSQLTestCase):
+ def test_pandas_udf_basic(self):
+ from pyspark.rdd import PythonEvalType
+ from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+ udf = pandas_udf(lambda x: x, DoubleType())
+ self.assertEqual(udf.returnType, DoubleType())
+ self.assertEqual(udf.evalType,
PythonEvalType.SQL_PANDAS_SCALAR_UDF)
+
+ udf = pandas_udf(lambda x: x, DoubleType(), PandasUDFType.SCALAR)
+ self.assertEqual(udf.returnType, DoubleType())
+ self.assertEqual(udf.evalType,
PythonEvalType.SQL_PANDAS_SCALAR_UDF)
+
+ udf = pandas_udf(lambda x: x, 'v double', PandasUDFType.GROUP_MAP)
+ self.assertEqual(udf.returnType, StructType([StructField("v",
DoubleType())]))
+ self.assertEqual(udf.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ udf = pandas_udf(lambda x: x, 'v double',
+ functionType=PandasUDFType.GROUP_MAP)
+ self.assertEqual(udf.returnType, StructType([StructField("v",
DoubleType())]))
+ self.assertEqual(udf.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ udf = pandas_udf(lambda x: x, returnType='v double',
+ functionType=PandasUDFType.GROUP_MAP)
+ self.assertEqual(udf.returnType, StructType([StructField("v",
DoubleType())]))
+ self.assertEqual(udf.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ def test_pandas_udf_decorator(self):
+ from pyspark.rdd import PythonEvalType
+ from pyspark.sql.functions import pandas_udf, PandasUDFType
+ from pyspark.sql.types import StructType, StructField, DoubleType
+
+ @pandas_udf(DoubleType())
+ def foo(x):
+ return x
+ self.assertEqual(foo.returnType, DoubleType())
+ self.assertEqual(foo.evalType,
PythonEvalType.SQL_PANDAS_SCALAR_UDF)
+
+ @pandas_udf(returnType=DoubleType())
+ def foo(x):
+ return x
+ self.assertEqual(foo.returnType, DoubleType())
+ self.assertEqual(foo.evalType,
PythonEvalType.SQL_PANDAS_SCALAR_UDF)
+
+ schema = StructType([StructField("v", DoubleType())])
+
+ @pandas_udf(schema, PandasUDFType.GROUP_MAP)
+ def foo(x):
+ return x
+ self.assertEqual(foo.returnType, schema)
+ self.assertEqual(foo.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ @pandas_udf(schema, functionType=PandasUDFType.GROUP_MAP)
+ def foo(x):
+ return x
+ self.assertEqual(foo.returnType, schema)
+ self.assertEqual(foo.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ @pandas_udf(returnType=schema,
functionType=PandasUDFType.GROUP_MAP)
+ def foo(x):
+ return x
+ self.assertEqual(foo.returnType, schema)
+ self.assertEqual(foo.evalType,
PythonEvalType.SQL_PANDAS_GROUP_MAP_UDF)
+
+ def test_udf_wrong_arg(self):
--- End diff --
I added a few more tests. Please let me know if you have specific tests in
mind.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]