Github user BryanCutler commented on a diff in the pull request:
https://github.com/apache/spark/pull/21650#discussion_r202865674
--- Diff: python/pyspark/sql/tests.py ---
@@ -5471,6 +5598,22 @@ def foo(_):
self.assertEqual(r.a, 'hi')
self.assertEqual(r.b, 1)
+ def test_mixed_udf(self):
+ # Test Pandas UDF and scalar Python UDF followed by groupby apply
+ from pyspark.sql.functions import udf, pandas_udf, PandasUDFType
+ import pandas as pd
+
+ df = self.spark.range(0, 10).toDF('v1')
+ df = df.withColumn('v2', udf(lambda x: x + 1, 'int')(df['v1']))
+ df = df.withColumn('v3', pandas_udf(lambda x: x + 2,
'int')(df['v1']))
--- End diff --
could you just chain the `withColumn` calls here? I think it's clearer
than reassigning the df each time
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]