Github user icexelloss commented on a diff in the pull request:
https://github.com/apache/spark/pull/20211#discussion_r161256435
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala ---
@@ -457,13 +458,26 @@ class RelationalGroupedDataset protected[sql](
val groupingNamedExpressions = groupingExprs.map {
case ne: NamedExpression => ne
- case other => Alias(other, other.toString)()
+ case other => Alias(other, toPrettySQL(other))()
}
val groupingAttributes = groupingNamedExpressions.map(_.toAttribute)
val child = df.logicalPlan
val project = Project(groupingNamedExpressions ++ child.output, child)
- val output = expr.dataType.asInstanceOf[StructType].toAttributes
- val plan = FlatMapGroupsInPandas(groupingAttributes, expr, output,
project)
+ val udfOutput: Seq[Attribute] =
expr.dataType.asInstanceOf[StructType].toAttributes
+ val additionalGroupingAttributes = mutable.ArrayBuffer[Attribute]()
+
+ for (attribute <- groupingAttributes) {
+ if (!udfOutput.map(_.name).contains(attribute.name)) {
--- End diff --
Yeah let's do that.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]