Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/21467#discussion_r192437788
--- Diff: python/pyspark/sql/tests.py ---
@@ -901,20 +901,43 @@ def __call__(self, x):
self.assertEqual(return_type, f_.returnType)
def test_stopiteration_in_udf(self):
- # test for SPARK-23754
- from pyspark.sql.functions import udf
+ from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
from py4j.protocol import Py4JJavaError
+ def do_test(action, *args, **kwargs):
+ exc_message = "Caught StopIteration thrown from user's code;
failing the task"
+ with self.assertRaisesRegexp(Py4JJavaError, exc_message) as cm:
+ action(*args, **kwargs)
+
def foo(x):
raise StopIteration()
- with self.assertRaises(Py4JJavaError) as cm:
- self.spark.range(0, 1000).withColumn('v',
udf(foo)('id')).show()
+ def foofoo(x, y):
+ raise StopIteration()
- self.assertIn(
- "Caught StopIteration thrown from user's code; failing the
task",
- cm.exception.java_exception.toString()
- )
+ df = self.spark.range(0, 100)
+
+ # plain udf (test for SPARK-23754)
+ do_test(df.withColumn('v', udf(foo)('id')).show)
+
+ # pandas scalar udf
+ do_test(df.withColumn(
+ 'v', pandas_udf(foo, 'double', PandasUDFType.SCALAR)('id')
--- End diff --
Oh, actually let's put them in `PandasUDFTests`.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]