[ 
https://issues.apache.org/jira/browse/SPARK-6247?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Michael Armbrust updated SPARK-6247:
------------------------------------
    Priority: Blocker  (was: Critical)

> Certain self joins cannot be analyzed
> -------------------------------------
>
>                 Key: SPARK-6247
>                 URL: https://issues.apache.org/jira/browse/SPARK-6247
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>            Reporter: Yin Huai
>            Assignee: Michael Armbrust
>            Priority: Blocker
>
> When you try the following code
> {code}
> val df =
>    (1 to 10)
>       .map(i => (i, i.toDouble, i.toLong, i.toString, i.toString))
>       .toDF("intCol", "doubleCol", "longCol", "stringCol1", "stringCol2")
> df.registerTempTable("test")
> sql(
>   """
>   |SELECT x.stringCol2, avg(y.intCol), sum(x.doubleCol)
>   |FROM test x JOIN test y ON (x.stringCol1 = y.stringCol1)
>   |GROUP BY x.stringCol2
>   """.stripMargin).explain()
> {code}
> The following exception will be thrown.
> {code}
> [info]   java.util.NoSuchElementException: next on empty iterator
> [info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:39)
> [info]   at scala.collection.Iterator$$anon$2.next(Iterator.scala:37)
> [info]   at 
> scala.collection.IndexedSeqLike$Elements.next(IndexedSeqLike.scala:64)
> [info]   at scala.collection.IterableLike$class.head(IterableLike.scala:91)
> [info]   at 
> scala.collection.mutable.ArrayBuffer.scala$collection$IndexedSeqOptimized$$super$head(ArrayBuffer.scala:47)
> [info]   at 
> scala.collection.IndexedSeqOptimized$class.head(IndexedSeqOptimized.scala:120)
> [info]   at scala.collection.mutable.ArrayBuffer.head(ArrayBuffer.scala:47)
> [info]   at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:247)
> [info]   at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$$anonfun$apply$7.applyOrElse(Analyzer.scala:197)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(TreeNode.scala:250)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:50)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:249)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$5.apply(TreeNode.scala:263)
> [info]   at scala.collection.Iterator$$anon$11.next(Iterator.scala:328)
> [info]   at scala.collection.Iterator$class.foreach(Iterator.scala:727)
> [info]   at scala.collection.AbstractIterator.foreach(Iterator.scala:1157)
> [info]   at 
> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:48)
> [info]   at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:103)
> [info]   at 
> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:47)
> [info]   at 
> scala.collection.TraversableOnce$class.to(TraversableOnce.scala:273)
> [info]   at scala.collection.AbstractIterator.to(Iterator.scala:1157)
> [info]   at 
> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:265)
> [info]   at scala.collection.AbstractIterator.toBuffer(Iterator.scala:1157)
> [info]   at 
> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:252)
> [info]   at scala.collection.AbstractIterator.toArray(Iterator.scala:1157)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformChildrenUp(TreeNode.scala:292)
> [info]   at 
> org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(TreeNode.scala:247)
> [info]   at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:197)
> [info]   at 
> org.apache.spark.sql.catalyst.analysis.Analyzer$ResolveReferences$.apply(Analyzer.scala:196)
> [info]   at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:61)
> [info]   at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1$$anonfun$apply$2.apply(RuleExecutor.scala:59)
> [info]   at 
> scala.collection.LinearSeqOptimized$class.foldLeft(LinearSeqOptimized.scala:111)
> [info]   at scala.collection.immutable.List.foldLeft(List.scala:84)
> [info]   at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:59)
> [info]   at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor$$anonfun$apply$1.apply(RuleExecutor.scala:51)
> [info]   at scala.collection.immutable.List.foreach(List.scala:318)
> [info]   at 
> org.apache.spark.sql.catalyst.rules.RuleExecutor.apply(RuleExecutor.scala:51)
> [info]   at 
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed$lzycompute(SQLContext.scala:1071)
> [info]   at 
> org.apache.spark.sql.SQLContext$QueryExecution.analyzed(SQLContext.scala:1071)
> [info]   at 
> org.apache.spark.sql.SQLContext$QueryExecution.assertAnalyzed(SQLContext.scala:1069)
> [info]   at org.apache.spark.sql.DataFrame.<init>(DataFrame.scala:133)
> [info]   at org.apache.spark.sql.DataFrame$.apply(DataFrame.scala:51)
> [info]   at org.apache.spark.sql.SQLContext.sql(SQLContext.scala:915)
> [info]   at 
> org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply$mcV$sp(SparkSqlSerializer2Suite.scala:66)
> [info]   at 
> org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
> [info]   at 
> org.apache.spark.serializer.SparkSqlSerializer2Suite$$anonfun$2.apply(SparkSqlSerializer2Suite.scala:48)
> [info]   at 
> org.scalatest.Transformer$$anonfun$apply$1.apply$mcV$sp(Transformer.scala:22)
> [info]   at org.scalatest.OutcomeOf$class.outcomeOf(OutcomeOf.scala:85)
> [info]   at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104)
> [info]   at org.scalatest.Transformer.apply(Transformer.scala:22)
> [info]   at org.scalatest.Transformer.apply(Transformer.scala:20)
> [info]   at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:166)
> [info]   at org.scalatest.Suite$class.withFixture(Suite.scala:1122)
> [info]   at org.scalatest.FunSuite.withFixture(FunSuite.scala:1555)
> [info]   at 
> org.scalatest.FunSuiteLike$class.invokeWithFixture$1(FunSuiteLike.scala:163)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$runTest$1.apply(FunSuiteLike.scala:175)
> [info]   at org.scalatest.SuperEngine.runTestImpl(Engine.scala:306)
> [info]   at org.scalatest.FunSuiteLike$class.runTest(FunSuiteLike.scala:175)
> [info]   at org.scalatest.FunSuite.runTest(FunSuite.scala:1555)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$runTests$1.apply(FunSuiteLike.scala:208)
> [info]   at 
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:413)
> [info]   at 
> org.scalatest.SuperEngine$$anonfun$traverseSubNodes$1$1.apply(Engine.scala:401)
> [info]   at scala.collection.immutable.List.foreach(List.scala:318)
> [info]   at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:401)
> [info]   at 
> org.scalatest.SuperEngine.org$scalatest$SuperEngine$$runTestsInBranch(Engine.scala:396)
> [info]   at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:483)
> [info]   at org.scalatest.FunSuiteLike$class.runTests(FunSuiteLike.scala:208)
> [info]   at org.scalatest.FunSuite.runTests(FunSuite.scala:1555)
> [info]   at org.scalatest.Suite$class.run(Suite.scala:1424)
> [info]   at 
> org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1555)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
> [info]   at 
> org.scalatest.FunSuiteLike$$anonfun$run$1.apply(FunSuiteLike.scala:212)
> [info]   at org.scalatest.SuperEngine.runImpl(Engine.scala:545)
> [info]   at org.scalatest.FunSuiteLike$class.run(FunSuiteLike.scala:212)
> [info]   at org.scalatest.FunSuite.run(FunSuite.scala:1555)
> [info]   at 
> org.scalatest.tools.Framework.org$scalatest$tools$Framework$$runSuite(Framework.scala:462)
> [info]   at 
> org.scalatest.tools.Framework$ScalaTestTask.execute(Framework.scala:671)
> [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:294)
> [info]   at sbt.ForkMain$Run$2.call(ForkMain.java:284)
> [info]   at java.util.concurrent.FutureTask.run(FutureTask.java:262)
> [info]   at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
> [info]   at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
> [info]   at java.lang.Thread.run(Thread.java:745)
> {code}



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to