[ 
https://issues.apache.org/jira/browse/SPARK-9950?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Michael Armbrust updated SPARK-9950:
------------------------------------
    Description: 
Spark 1.4:
{code}
import org.apache.spark.sql.functions._
val df = Seq(("x", (1,1)), ("y", (2, 2))).toDF("a", "b")
df.groupBy("b._1").agg(sum("b._2"))
df.collect()

df: org.apache.spark.sql.DataFrame = [a: string, b: struct<_1:int,_2:int>]
res0: Array[org.apache.spark.sql.Row] = Array([x,[1,1]], [y,[2,2]])
{code}

Spark 1.5
{code}
org.apache.spark.sql.AnalysisException: expression 'b' is neither present in 
the group by, nor is it an aggregate function. Add to group by or wrap in 
first() if you don't care which value you get.;
        at 
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:37)
        at 
org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44)
        at 
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:110)
{code}

  was:
Spark 1.4:
{code}
import org.apache.spark.sql.functions._
val df = Seq(("x", (1,1)), ("y", (2, 2))).toDF("a", "b")
df.groupBy("b._1").agg(sum("b._2"))
df.collect()
import org.apache.spark.sql.functions._
df: org.apache.spark.sql.DataFrame = [a: string, b: struct<_1:int,_2:int>]
res0: Array[org.apache.spark.sql.Row] = Array([x,[1,1]], [y,[2,2]])
{code}

Spark 1.5
{code}
org.apache.spark.sql.AnalysisException: expression 'b' is neither present in 
the group by, nor is it an aggregate function. Add to group by or wrap in 
first() if you don't care which value you get.;
        at 
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:37)
        at 
org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44)
        at 
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:110)
{code}


> Wrong Analysis Error for grouping/aggregating on struct fields
> --------------------------------------------------------------
>
>                 Key: SPARK-9950
>                 URL: https://issues.apache.org/jira/browse/SPARK-9950
>             Project: Spark
>          Issue Type: Sub-task
>          Components: SQL
>    Affects Versions: 1.5.0
>            Reporter: Michael Armbrust
>            Priority: Blocker
>
> Spark 1.4:
> {code}
> import org.apache.spark.sql.functions._
> val df = Seq(("x", (1,1)), ("y", (2, 2))).toDF("a", "b")
> df.groupBy("b._1").agg(sum("b._2"))
> df.collect()
> df: org.apache.spark.sql.DataFrame = [a: string, b: struct<_1:int,_2:int>]
> res0: Array[org.apache.spark.sql.Row] = Array([x,[1,1]], [y,[2,2]])
> {code}
> Spark 1.5
> {code}
> org.apache.spark.sql.AnalysisException: expression 'b' is neither present in 
> the group by, nor is it an aggregate function. Add to group by or wrap in 
> first() if you don't care which value you get.;
>       at 
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:37)
>       at 
> org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:44)
>       at 
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:110)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to