Repository: spark Updated Branches: refs/heads/master 70a6f0bb5 -> e9b6e7d85
[SPARK-13456][SQL][FOLLOW-UP] lazily generate the outer pointer for case class defined in REPL ## What changes were proposed in this pull request? In https://github.com/apache/spark/pull/11410, we missed a corner case: define the inner class and use it in `Dataset` at the same time by using paste mode. For this case, the inner class and the `Dataset` are inside same line object, when we build the `Dataset`, we try to get outer pointer from line object, and it will fail because the line object is not initialized yet. https://issues.apache.org/jira/browse/SPARK-13456?focusedCommentId=15209174&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15209174 is an example for this corner case. This PR make the process of getting outer pointer from line object lazy, so that we can successfully build the `Dataset` and finish initializing the line object. ## How was this patch tested? new test in repl suite. Author: Wenchen Fan <wenc...@databricks.com> Closes #11931 from cloud-fan/repl. Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9b6e7d8 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9b6e7d8 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9b6e7d8 Branch: refs/heads/master Commit: e9b6e7d8577cd721a433130f29e8b112d98768b9 Parents: 70a6f0b Author: Wenchen Fan <wenc...@databricks.com> Authored: Fri Mar 25 20:19:04 2016 +0800 Committer: Cheng Lian <l...@databricks.com> Committed: Fri Mar 25 20:19:04 2016 +0800 ---------------------------------------------------------------------- .../scala/org/apache/spark/repl/ReplSuite.scala | 15 ++++++++ .../spark/sql/catalyst/analysis/Analyzer.scala | 2 +- .../sql/catalyst/encoders/OuterScopes.scala | 39 +++++++++++++------- .../sql/catalyst/expressions/objects.scala | 12 +++--- 4 files changed, 48 insertions(+), 20 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala ---------------------------------------------------------------------- diff --git a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala index f148a6d..dbfacba 100644 --- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala +++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala @@ -59,6 +59,10 @@ class ReplSuite extends SparkFunSuite { return out.toString } + // Simulate the paste mode in Scala REPL. + def runInterpreterInPasteMode(master: String, input: String): String = + runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii code of CTRL + D + def assertContains(message: String, output: String) { val isContain = output.contains(message) assert(isContain, @@ -381,4 +385,15 @@ class ReplSuite extends SparkFunSuite { assertDoesNotContain("error:", output) assertDoesNotContain("Exception", output) } + + test("define case class and create Dataset together with paste mode") { + val output = runInterpreterInPasteMode("local-cluster[1,1,1024]", + """ + |import sqlContext.implicits._ + |case class TestClass(value: Int) + |Seq(TestClass(1)).toDS() + """.stripMargin) + assertDoesNotContain("error:", output) + assertDoesNotContain("Exception", output) + } } http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala index b344e04..89b18af 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala @@ -605,7 +605,7 @@ class Analyzer( "access to the scope that this class was defined in.\n" + "Try moving this class out of its parent class.") } - n.copy(outerPointer = Some(Literal.fromObject(outer))) + n.copy(outerPointer = Some(outer)) } } http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala index c047e96..a1f0312 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala @@ -42,7 +42,12 @@ object OuterScopes { outerScopes.putIfAbsent(outer.getClass.getName, outer) } - def getOuterScope(innerCls: Class[_]): AnyRef = { + /** + * Returns a function which can get the outer scope for the given inner class. By using function + * as return type, we can delay the process of getting outer pointer to execution time, which is + * useful for inner class defined in REPL. + */ + def getOuterScope(innerCls: Class[_]): () => AnyRef = { assert(innerCls.isMemberClass) val outerClassName = innerCls.getDeclaringClass.getName val outer = outerScopes.get(outerClassName) @@ -53,24 +58,30 @@ object OuterScopes { // `INSTANCE()` method to get the single instance of class `$read`. Then call `$iw()` // method multiply times to get the single instance of the inner most `$iw` class. case REPLClass(baseClassName) => - val objClass = Utils.classForName(baseClassName + "$") - val objInstance = objClass.getField("MODULE$").get(null) - val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance) - val baseClass = Utils.classForName(baseClassName) + () => { + val objClass = Utils.classForName(baseClassName + "$") + val objInstance = objClass.getField("MODULE$").get(null) + val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance) + val baseClass = Utils.classForName(baseClassName) - var getter = iwGetter(baseClass) - var obj = baseInstance - while (getter != null) { - obj = getter.invoke(obj) - getter = iwGetter(getter.getReturnType) - } + var getter = iwGetter(baseClass) + var obj = baseInstance + while (getter != null) { + obj = getter.invoke(obj) + getter = iwGetter(getter.getReturnType) + } - outerScopes.putIfAbsent(outerClassName, obj) - obj + if (obj == null) { + throw new RuntimeException(s"Failed to get outer pointer for ${innerCls.getName}") + } + + outerScopes.putIfAbsent(outerClassName, obj) + obj + } case _ => null } } else { - outer + () => outer } } http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala ---------------------------------------------------------------------- diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala index 7eba617..07b67a0 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala @@ -197,15 +197,17 @@ object NewInstance { * @param dataType The type of object being constructed, as a Spark SQL datatype. This allows you * to manually specify the type when the object in question is a valid internal * representation (i.e. ArrayData) instead of an object. - * @param outerPointer If the object being constructed is an inner class the outerPointer must - * for the containing class must be specified. + * @param outerPointer If the object being constructed is an inner class, the outerPointer for the + * containing class must be specified. This parameter is defined as an optional + * function, which allows us to get the outer pointer lazily,and it's useful if + * the inner class is defined in REPL. */ case class NewInstance( cls: Class[_], arguments: Seq[Expression], propagateNull: Boolean, dataType: DataType, - outerPointer: Option[Literal]) extends Expression with NonSQLExpression { + outerPointer: Option[() => AnyRef]) extends Expression with NonSQLExpression { private val className = cls.getName override def nullable: Boolean = propagateNull @@ -220,12 +222,12 @@ case class NewInstance( val argGen = arguments.map(_.gen(ctx)) val argString = argGen.map(_.value).mkString(", ") - val outer = outerPointer.map(_.gen(ctx)) + val outer = outerPointer.map(func => Literal.fromObject(func()).gen(ctx)) val setup = s""" ${argGen.map(_.code).mkString("\n")} - ${outer.map(_.code.mkString("")).getOrElse("")} + ${outer.map(_.code).getOrElse("")} """.stripMargin val constructorCall = outer.map { gen => --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org