[jira] [Updated] (SPARK-6247) Certain self joins cannot be analyzed
[ https://issues.apache.org/jira/browse/SPARK-6247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michael Armbrust updated SPARK-6247: Priority: Critical (was: Major) Certain self joins cannot be analyzed - Key: SPARK-6247 URL: https://issues.apache.org/jira/browse/SPARK-6247 Project: Spark Issue Type: Bug Components: SQL Reporter: Yin Huai Priority: Critical When you try the following code {code} val df = (1 to 10) .map(i = (i, i.toDouble, i.toLong, i.toString, i.toString)) .toDF(intCol, doubleCol, longCol, stringCol1, stringCol2) df.registerTempTable(test) sql( |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol) |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1) |GROUP BY x.stringCol2 .stripMargin).explain() {code} The following exception will be thrown. {code} [info] java.util.NoSuchElementException: next on empty iterator [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:39) [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:37) [info] at scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64) [info] at scala.collection.IterableLike$class.head(IterableLike.scala:91) [info] at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47) [info] at scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120) [info] at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263) [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) [info] at scala.collection.Iterator$class.foreach(Iterator.scala:727) [info] at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) [info] at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) [info] at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) [info] at scala.collection.AbstractIterator.to(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) [info] at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) [info] at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) [info] at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) [info] at scala.collection.immutable.List.foldLeft(List.scala:84) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) [info] at scala.collection.immutable.List.foreach(List.scala:318) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069) [info] at
[jira] [Updated] (SPARK-6247) Certain self joins cannot be analyzed
[ https://issues.apache.org/jira/browse/SPARK-6247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michael Armbrust updated SPARK-6247: Target Version/s: 1.3.1 (was: 1.3.0) Certain self joins cannot be analyzed - Key: SPARK-6247 URL: https://issues.apache.org/jira/browse/SPARK-6247 Project: Spark Issue Type: Bug Components: SQL Reporter: Yin Huai When you try the following code {code} val df = (1 to 10) .map(i = (i, i.toDouble, i.toLong, i.toString, i.toString)) .toDF(intCol, doubleCol, longCol, stringCol1, stringCol2) df.registerTempTable(test) sql( |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol) |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1) |GROUP BY x.stringCol2 .stripMargin).explain() {code} The following exception will be thrown. {code} [info] java.util.NoSuchElementException: next on empty iterator [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:39) [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:37) [info] at scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64) [info] at scala.collection.IterableLike$class.head(IterableLike.scala:91) [info] at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47) [info] at scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120) [info] at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263) [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) [info] at scala.collection.Iterator$class.foreach(Iterator.scala:727) [info] at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) [info] at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) [info] at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) [info] at scala.collection.AbstractIterator.to(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) [info] at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) [info] at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) [info] at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) [info] at scala.collection.immutable.List.foldLeft(List.scala:84) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) [info] at scala.collection.immutable.List.foreach(List.scala:318) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069) [info] at org.apache.spark.sql.DataFrame.init(DataFrame.scala:133) [info] at
[jira] [Updated] (SPARK-6247) Certain self joins cannot be analyzed
[ https://issues.apache.org/jira/browse/SPARK-6247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Michael Armbrust updated SPARK-6247: Priority: Blocker (was: Critical) Certain self joins cannot be analyzed - Key: SPARK-6247 URL: https://issues.apache.org/jira/browse/SPARK-6247 Project: Spark Issue Type: Bug Components: SQL Reporter: Yin Huai Assignee: Michael Armbrust Priority: Blocker When you try the following code {code} val df = (1 to 10) .map(i = (i, i.toDouble, i.toLong, i.toString, i.toString)) .toDF(intCol, doubleCol, longCol, stringCol1, stringCol2) df.registerTempTable(test) sql( |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol) |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1) |GROUP BY x.stringCol2 .stripMargin).explain() {code} The following exception will be thrown. {code} [info] java.util.NoSuchElementException: next on empty iterator [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:39) [info] at scala.collection.Iterator$$anon$2.next(Iterator.scala:37) [info] at scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64) [info] at scala.collection.IterableLike$class.head(IterableLike.scala:91) [info] at scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47) [info] at scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120) [info] at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250) [info] at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249) [info] at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263) [info] at scala.collection.Iterator$$anon$11.next(Iterator.scala:328) [info] at scala.collection.Iterator$class.foreach(Iterator.scala:727) [info] at scala.collection.AbstractIterator.foreach(Iterator.scala:1157) [info] at scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103) [info] at scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47) [info] at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273) [info] at scala.collection.AbstractIterator.to(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265) [info] at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157) [info] at scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252) [info] at scala.collection.AbstractIterator.toArray(Iterator.scala:1157) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292) [info] at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197) [info] at org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59) [info] at scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111) [info] at scala.collection.immutable.List.foldLeft(List.scala:84) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51) [info] at scala.collection.immutable.List.foreach(List.scala:318) [info] at org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071) [info] at org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069) [info] at