[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15418817#comment-15418817 ] Wenchen Fan commented on SPARK-16955: - this bug is already fixed by https://github.com/apache/spark/pull/14595 by accident. > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) > at >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15418288#comment-15418288 ] Apache Spark commented on SPARK-16955: -- User 'clockfly' has created a pull request for this issue: https://github.com/apache/spark/pull/14616 > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15412893#comment-15412893 ] Dongjoon Hyun commented on SPARK-16955: --- Hi, [~yhuai]. Could you review the PR? The root cause was `ResolveAggregateFunctions` removed the ordinal sort orders too early. After improving the `if` condition to check the resolution is completed, the case works well. > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15412551#comment-15412551 ] Apache Spark commented on SPARK-16955: -- User 'dongjoon-hyun' has created a pull request for this issue: https://github.com/apache/spark/pull/14546 > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15412475#comment-15412475 ] Dongjoon Hyun commented on SPARK-16955: --- `ResolveAggregateFunctions` seems to have a bug to drop the ordinals. I'll make a PR after some testing. > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15412265#comment-15412265 ] Dongjoon Hyun commented on SPARK-16955: --- Sure! Thank you, [~yhuai]. I'll take a look this. > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) > at >
[jira] [Commented] (SPARK-16955) Using ordinals in ORDER BY causes an analysis error when the query has a GROUP BY clause using ordinals
[ https://issues.apache.org/jira/browse/SPARK-16955?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=15412263#comment-15412263 ] Yin Huai commented on SPARK-16955: -- [~dongjoon] Will have time to take a look? > Using ordinals in ORDER BY causes an analysis error when the query has a > GROUP BY clause using ordinals > --- > > Key: SPARK-16955 > URL: https://issues.apache.org/jira/browse/SPARK-16955 > Project: Spark > Issue Type: Bug >Affects Versions: 2.0.0 >Reporter: Yin Huai > > The following queries work > {code} > select a from (select 1 as a) tmp order by 1 > select a, count(*) from (select 1 as a) tmp group by 1 > select a, count(*) from (select 1 as a) tmp group by 1 order by a > {code} > However, the following query does not > {code} > select a, count(*) from (select 1 as a) tmp group by 1 order by 1 > {code} > {code} > org.apache.spark.sql.catalyst.analysis.UnresolvedException: Invalid call to > Group by position: '1' exceeds the size of the select list '0'. on unresolved > object, tree: > Aggregate [1] > +- SubqueryAlias tmp >+- Project [1 AS a#82] > +- OneRowRelation$ > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:749) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11$$anonfun$34.apply(Analyzer.scala:739) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:244) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:47) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:244) > at scala.collection.AbstractTraversable.map(Traversable.scala:105) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:739) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$$anonfun$apply$11.applyOrElse(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:715) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveOrdinalInOrderByAndGroupBy$.apply(Analyzer.scala:714) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:85) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1$$anonfun$apply$1.apply(RuleExecutor.scala:82) > at > scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) > at scala.collection.immutable.List.foldLeft(List.scala:84) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:82) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$execute$1.apply(RuleExecutor.scala:74) > at scala.collection.immutable.List.foreach(List.scala:318) > at > org.apache.spark.sql.catalyst.rules.RuleExecutor.execute(RuleExecutor.scala:74) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1237) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$$anonfun$apply$20.applyOrElse(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan$$anonfun$resolveOperators$1.apply(LogicalPlan.scala:61) > at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:69) > at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.resolveOperators(LogicalPlan.scala:60) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1182) > at > org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveAggregateFunctions$.apply(Analyzer.scala:1181) > at >