Github user icexelloss commented on a diff in the pull request:
https://github.com/apache/spark/pull/20531#discussion_r166650642
--- Diff: python/pyspark/sql/udf.py ---
@@ -112,15 +112,31 @@ def returnType(self):
else:
self._returnType_placeholder =
_parse_datatype_string(self._returnType)
- if self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF \
- and not isinstance(self._returnType_placeholder,
StructType):
- raise ValueError("Invalid returnType: returnType must be a
StructType for "
- "pandas_udf with function type GROUPED_MAP")
- elif self.evalType == PythonEvalType.SQL_GROUPED_AGG_PANDAS_UDF \
- and isinstance(self._returnType_placeholder, (StructType,
ArrayType, MapType)):
- raise NotImplementedError(
- "ArrayType, StructType and MapType are not supported with "
- "PandasUDFType.GROUPED_AGG")
+ if self.evalType == PythonEvalType.SQL_GROUPED_MAP_PANDAS_UDF:
+ if isinstance(self._returnType_placeholder, StructType):
+ try:
+ to_arrow_schema(self._returnType_placeholder)
+ except TypeError:
+ raise NotImplementedError(
+ "Invalid returnType with a grouped map Pandas UDF:
"
--- End diff --
nit: `a grouped map Pandas UDF` -> `grouped map Pandas UDFs`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]