Github user HyukjinKwon commented on a diff in the pull request:
https://github.com/apache/spark/pull/22030#discussion_r208462120
--- Diff:
sql/core/src/main/scala/org/apache/spark/sql/RelationalGroupedDataset.scala ---
@@ -403,20 +415,29 @@ class RelationalGroupedDataset protected[sql](
*
* {{{
* // Compute the sum of earnings for each year by course with each
course as a separate column
- * df.groupBy($"year").pivot($"course", Seq("dotNET",
"Java")).sum($"earnings")
+ * df.groupBy($"year").pivot($"course", Seq(lit("dotNET"),
lit("Java"))).sum($"earnings")
+ * }}}
+ *
+ * For pivoting by multiple columns, use the `struct` function to
combine the columns and values:
+ *
+ * {{{
+ * df
+ * .groupBy($"year")
+ * .pivot(struct($"course", $"training"), Seq(struct(lit("java"),
lit("Experts"))))
+ * .agg(sum($"earnings"))
* }}}
*
* @param pivotColumn the column to pivot.
* @param values List of values that will be translated to columns in
the output DataFrame.
* @since 2.4.0
*/
- def pivot(pivotColumn: Column, values: Seq[Any]):
RelationalGroupedDataset = {
+ def pivot(pivotColumn: Column, values: Seq[Column]):
RelationalGroupedDataset = {
--- End diff --
> No. Seq[Any] takes literal values (objects); Seq[Column] takes Column
expressions.
I mean:
Before:
```
scala> val df = spark.range(10).selectExpr("struct(id) as a")
df: org.apache.spark.sql.DataFrame = [a: struct<id: bigint>]
scala> df.groupBy().pivot("a", Seq(struct(lit(1)))).count().show()
java.lang.RuntimeException: Unsupported literal type class
org.apache.spark.sql.Column named_struct(col1, 1)
at
org.apache.spark.sql.catalyst.expressions.Literal$.apply(literals.scala:78)
at
org.apache.spark.sql.RelationalGroupedDataset$$anonfun$pivot$1.apply(RelationalGroupedDataset.scala:419)
at
org.apache.spark.sql.RelationalGroupedDataset$$anonfun$pivot$1.apply(RelationalGroupedDataset.scala:419)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at
scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
at scala.collection.immutable.List.foreach(List.scala:392)
at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
at scala.collection.immutable.List.map(List.scala:296)
at
org.apache.spark.sql.RelationalGroupedDataset.pivot(RelationalGroupedDataset.scala:419)
at
org.apache.spark.sql.RelationalGroupedDataset.pivot(RelationalGroupedDataset.scala:338)
... 51 elided
```
After:
```
scala> val df = spark.range(10).selectExpr("struct(id) as a")
df: org.apache.spark.sql.DataFrame = [a: struct<id: bigint>]
scala> df.groupBy().pivot("a", Seq(struct(lit(1)))).count().show()
+---+
|[1]|
+---+
| 1|
+---+
```
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]