Github user HyukjinKwon commented on a diff in the pull request: https://github.com/apache/spark/pull/21467#discussion_r192436233 --- Diff: python/pyspark/sql/tests.py --- @@ -901,20 +901,43 @@ def __call__(self, x): self.assertEqual(return_type, f_.returnType) def test_stopiteration_in_udf(self): - # test for SPARK-23754 - from pyspark.sql.functions import udf + from pyspark.sql.functions import udf, pandas_udf, PandasUDFType from py4j.protocol import Py4JJavaError + def do_test(action, *args, **kwargs): + exc_message = "Caught StopIteration thrown from user's code; failing the task" + with self.assertRaisesRegexp(Py4JJavaError, exc_message) as cm: + action(*args, **kwargs) + def foo(x): raise StopIteration() - with self.assertRaises(Py4JJavaError) as cm: - self.spark.range(0, 1000).withColumn('v', udf(foo)('id')).show() + def foofoo(x, y): + raise StopIteration() - self.assertIn( - "Caught StopIteration thrown from user's code; failing the task", - cm.exception.java_exception.toString() - ) + df = self.spark.range(0, 100) + + # plain udf (test for SPARK-23754) + do_test(df.withColumn('v', udf(foo)('id')).show) + + # pandas scalar udf + do_test(df.withColumn( + 'v', pandas_udf(foo, 'double', PandasUDFType.SCALAR)('id') --- End diff -- Ah, in the Jenkins, we run this tests via Python 2, Python 3 and PyPy. Python 2 and PyPy don't have PyArrow installed, so it throws an exception. It should be placed in `*PandasUDFTests` for each. For example, this is a Scalar Pandas UDF which should be placed in `ScalarPandasUDFTests`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org