[ https://issues.apache.org/jira/browse/SPARK-32307?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dongjoon Hyun resolved SPARK-32307. ----------------------------------- Fix Version/s: 3.1.0 3.0.1 Resolution: Fixed Issue resolved by pull request 29106 [https://github.com/apache/spark/pull/29106] > Aggression that use map type input UDF as group expression can fail > ------------------------------------------------------------------- > > Key: SPARK-32307 > URL: https://issues.apache.org/jira/browse/SPARK-32307 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 3.0.1 > Reporter: wuyi > Assignee: wuyi > Priority: Blocker > Fix For: 3.0.1, 3.1.0 > > > {code:java} > spark.udf.register("key", udf((m: Map[String, String]) => m.keys.head.toInt)) > Seq(Map("1" -> "one", "2" -> "two")).toDF("a").createOrReplaceTempView("t") > checkAnswer(sql("SELECT key(a) AS k FROM t GROUP BY key(a)"), Row(1) :: Nil) > [info] org.apache.spark.sql.AnalysisException: expression 't.`a`' is > neither present in the group by, nor is it an aggregate function. Add to > group by or wrap in first() (or first_value) if you don't care which value > you get.;; > [info] Aggregate [UDF(a#6)], [UDF(a#6) AS k#8] > [info] +- SubqueryAlias t > [info] +- Project [value#3 AS a#6] > [info] +- LocalRelation [value#3] > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis(CheckAnalysis.scala:49) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.failAnalysis$(CheckAnalysis.scala:48) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.failAnalysis(Analyzer.scala:130) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkValidAggregateExpression$1(CheckAnalysis.scala:257) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$10(CheckAnalysis.scala:259) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$10$adapted(CheckAnalysis.scala:259) > [info] at > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > [info] at > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > [info] at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkValidAggregateExpression$1(CheckAnalysis.scala:259) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$10(CheckAnalysis.scala:259) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$10$adapted(CheckAnalysis.scala:259) > [info] at scala.collection.immutable.List.foreach(List.scala:392) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkValidAggregateExpression$1(CheckAnalysis.scala:259) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$13(CheckAnalysis.scala:286) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$13$adapted(CheckAnalysis.scala:286) > [info] at scala.collection.immutable.List.foreach(List.scala:392) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1(CheckAnalysis.scala:286) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.$anonfun$checkAnalysis$1$adapted(CheckAnalysis.scala:92) > [info] at > org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(TreeNode.scala:177) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis(CheckAnalysis.scala:92) > [info] at > org.apache.spark.sql.catalyst.analysis.CheckAnalysis.checkAnalysis$(CheckAnalysis.scala:89) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(Analyzer.scala:130) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.$anonfun$executeAndCheck$1(Analyzer.scala:156) > [info] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper$.markInAnalyzer(AnalysisHelper.scala:201) > [info] at > org.apache.spark.sql.catalyst.analysis.Analyzer.executeAndCheck(Analyzer.scala:153) > [info] at > org.apache.spark.sql.execution.QueryExecution.$anonfun$analyzed$1(QueryExecution.scala:70) > [info] at > org.apache.spark.sql.catalyst.QueryPlanningTracker.measurePhase(QueryPlanningTracker.scala:111) > [info] at > org.apache.spark.sql.execution.QueryExecution.$anonfun$executePhase$1(QueryExecution.scala:135) > [info] at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > [info] at > org.apache.spark.sql.execution.QueryExecution.executePhase(QueryExecution.scala:135) > [info] at > org.apache.spark.sql.execution.QueryExecution.analyzed$lzycompute(QueryExecution.scala:70) > [info] at > org.apache.spark.sql.execution.QueryExecution.analyzed(QueryExecution.scala:68) > [info] at > org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(QueryExecution.scala:60) > [info] at org.apache.spark.sql.Dataset$.$anonfun$ofRows$2(Dataset.scala:99) > [info] at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > [info] at org.apache.spark.sql.Dataset$.ofRows(Dataset.scala:97) > [info] at > org.apache.spark.sql.SparkSession.$anonfun$sql$1(SparkSession.scala:606) > [info] at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:763) > [info] at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:601) > ... > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org