[
https://issues.apache.org/jira/browse/SPARK-7269?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Cheng Lian updated SPARK-7269:
------------------------------
Description:
In a case insensitive analyzer (HiveContext), the attribute name captial
differences will fail the analysis check for aggregation.
{code}
test("check analysis failed in case in-sensitive") {
Seq(1,2,3).map(i => (i, i.toString)).toDF("key",
"value").registerTempTable("df_analysis")
sql("SELECT kEy from df_analysis group by key")
}
{code}
{noformat}
expression 'kEy' is neither present in the group by, nor is it an aggregate
function. Add to group by or wrap in first() if you don't care which value you
get.;
org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present in
the group by, nor is it an aggregate function. Add to group by or wrap in
first() if you don't care which value you get.;
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
at
org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
at
org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
at
org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
{noformat}
was:
In a case insensitive analyzer (HiveContext), the attribute name captial
differences will fail the analysis check for aggregation.
{code}
test("check analysis failed in case in-sensitive") {
Seq(1,2,3).map(i => (i, i.toString)).toDF("key",
"value").registerTempTable("df_analysis")
sql("SELECT kEy from df_analysis group by key")
}
{code}
{panel}
expression 'kEy' is neither present in the group by, nor is it an aggregate
function. Add to group by or wrap in first() if you don't care which value you
get.;
org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present in
the group by, nor is it an aggregate function. Add to group by or wrap in
first() if you don't care which value you get.;
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
at
scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
at
org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
at
org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
at
org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
at
org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
at
org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
at
org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
at org.scalatest.Transformer.apply(Transformer.scala:22)
at org.scalatest.Transformer.apply(Transformer.scala:20)
at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
at
org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
{panel}
> Incorrect aggregation analysis
> ------------------------------
>
> Key: SPARK-7269
> URL: https://issues.apache.org/jira/browse/SPARK-7269
> Project: Spark
> Issue Type: Bug
> Components: SQL
> Reporter: Cheng Hao
> Priority: Blocker
>
> In a case insensitive analyzer (HiveContext), the attribute name captial
> differences will fail the analysis check for aggregation.
> {code}
> test("check analysis failed in case in-sensitive") {
> Seq(1,2,3).map(i => (i, i.toString)).toDF("key",
> "value").registerTempTable("df_analysis")
> sql("SELECT kEy from df_analysis group by key")
> }
> {code}
> {noformat}
> expression 'kEy' is neither present in the group by, nor is it an aggregate
> function. Add to group by or wrap in first() if you don't care which value
> you get.;
> org.apache.spark.sql.AnalysisException: expression 'kEy' is neither present
> in the group by, nor is it an aggregate function. Add to group by or wrap in
> first() if you don't care which value you get.;
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.failAnalysis(CheckAnalysis.scala:38)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:39)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.org$apache$spark$sql$catalyst$analysis$CheckAnalysis$class$$anonfun$$checkValidAggregateExpression$1(CheckAnalysis.scala:85)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$4.apply(CheckAnalysis.scala:101)
> at
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
> at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:101)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(CheckAnalysis.scala:50)
> at
> org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:89)
> at
> org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(CheckAnalysis.scala:50)
> at
> org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:39)
> at
> org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1121)
> at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
> at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
> at org.apache.spark.sql.hive.HiveContext.sql(HiveContext.scala:97)
> at
> org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply$mcV$sp(SQLQuerySuite.scala:408)
> at
> org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
> at
> org.apache.spark.sql.hive.execution.SQLQuerySuite$$anonfun$15.apply(SQLQuerySuite.scala:406)
> at
> org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
> at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
> at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> at org.scalatest.Transformer.apply(Transformer.scala:22)
> at org.scalatest.Transformer.apply(Transformer.scala:20)
> at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
> at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
> at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
> at
> org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
> {noformat}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]