Repository: spark Updated Branches: refs/heads/branch-2.0 e05ad8830 -> 7fbb07372
[SPARK-17760][SQL][BACKPORT] AnalysisException with dataframe pivot when groupBy column is not attribute ## What changes were proposed in this pull request? Backport of #16177 to branch-2.0 ## How was this patch tested? existing and additional unit tests Author: Andrew Ray <ray.and...@gmail.com> Closes #16197 from aray/SPARK-17760-2.0. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/7fbb0737 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/7fbb0737 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/7fbb0737 Branch: refs/heads/branch-2.0 Commit: 7fbb073728e984ce11e8f64f324878a399078e14 Parents: e05ad88 Author: Andrew Ray <ray.and...@gmail.com> Authored: Wed Dec 7 18:30:34 2016 +0100 Committer: Herman van Hovell <hvanhov...@databricks.com> Committed: Wed Dec 7 18:30:34 2016 +0100 ---------------------------------------------------------------------- .../org/apache/spark/sql/catalyst/analysis/Analyzer.scala | 5 +++-- .../scala/org/apache/spark/sql/DataFramePivotSuite.scala | 9 +++++++++ 2 files changed, 12 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/7fbb0737/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index 05a2d18..32dc70a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -402,14 +402,15 @@ class Analyzer( .toAggregateExpression() , "__pivot_" + a.sql)() } - val secondAgg = Aggregate(groupByExprs, groupByExprs ++ pivotAggs, firstAgg) + val groupByExprsAttr = groupByExprs.map(_.toAttribute) + val secondAgg = Aggregate(groupByExprsAttr, groupByExprsAttr ++ pivotAggs, firstAgg) val pivotAggAttribute = pivotAggs.map(_.toAttribute) val pivotOutputs = pivotValues.zipWithIndex.flatMap { case (value, i) => aggregates.zip(pivotAggAttribute).map { case (aggregate, pivotAtt) => Alias(ExtractValue(pivotAtt, Literal(i), resolver), outputName(value, aggregate))() } } - Project(groupByExprs ++ pivotOutputs, secondAgg) + Project(groupByExprsAttr ++ pivotOutputs, secondAgg) } else { val pivotAggregates: Seq[NamedExpression] = pivotValues.flatMap { value => def ifExpr(expr: Expression) = { http://git-wip-us.apache.org/repos/asf/spark/blob/7fbb0737/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala index d5cb5e1..41d3525 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/DataFramePivotSuite.scala @@ -197,4 +197,13 @@ class DataFramePivotSuite extends QueryTest with SharedSQLContext{ Row(2013, Seq(48000.0, 7.0), Seq(30000.0, 7.0)) :: Nil ) } + + test("pivot with column definition in groupby") { + checkAnswer( + courseSales.groupBy(substring(col("course"), 0, 1).as("foo")) + .pivot("year", Seq(2012, 2013)) + .sum("earnings"), + Row("d", 15000.0, 48000.0) :: Row("J", 20000.0, 30000.0) :: Nil + ) + } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org