Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/20171#discussion_r161385602
--- Diff: python/pyspark/sql/tests.py ---
@@ -4037,6 +4075,21 @@ def test_simple(self):
expected =
df.toPandas().groupby('id').apply(foo_udf.func).reset_index(drop=True)
self.assertFramesEqual(expected, result)
+ def test_register_group_map_udf(self):
+ from pyspark.sql.functions import pandas_udf, PandasUDFType
+
+ foo_udf = pandas_udf(
+ lambda pdf: pdf.assign(v1=pdf.id * 1.0),
+ StructType(
+ [StructField('id', LongType()),
+ StructField('v1', DoubleType())]),
+ PandasUDFType.GROUP_MAP
+ )
--- End diff --
We could simplify this to
```python
foo_udf = pandas_udf(lambda x: x, "id long", PandasUDFType.GROUP_MAP)
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]