Github user viirya commented on a diff in the pull request: https://github.com/apache/spark/pull/19872#discussion_r154569884 --- Diff: python/pyspark/sql/group.py --- @@ -89,8 +89,15 @@ def agg(self, *exprs): else: # Columns assert all(isinstance(c, Column) for c in exprs), "all exprs should be Column" - jdf = self._jgd.agg(exprs[0]._jc, - _to_seq(self.sql_ctx._sc, [c._jc for c in exprs[1:]])) + if isinstance(exprs[0], UDFColumn): + assert all(isinstance(c, UDFColumn) for c in exprs) + jdf = self._jgd.aggInPandas( + _to_seq(self.sql_ctx._sc, [c._jc for c in exprs])) + else: + jdf = self._jgd.agg(exprs[0]._jc, --- End diff -- If `exprs[n]` (n > 0) is a `UDFColumn`? I think we should make sure if any column is a `UDFColumn`, all columns should be `UDFColumn`.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org