Repository: spark
Updated Branches:
  refs/heads/master 70a6f0bb5 -> e9b6e7d85


[SPARK-13456][SQL][FOLLOW-UP] lazily generate the outer pointer for case class 
defined in REPL

## What changes were proposed in this pull request?

In https://github.com/apache/spark/pull/11410, we missed a corner case: define 
the inner class and use it in `Dataset` at the same time by using paste mode. 
For this case, the inner class and the `Dataset` are inside same line object, 
when we build the `Dataset`, we try to get outer pointer from line object, and 
it will fail because the line object is not initialized yet.

https://issues.apache.org/jira/browse/SPARK-13456?focusedCommentId=15209174&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15209174
 is an example for this corner case.

This PR make the process of getting outer pointer from line object lazy, so 
that we can successfully build the `Dataset` and finish initializing the line 
object.

## How was this patch tested?

new test in repl suite.

Author: Wenchen Fan <wenc...@databricks.com>

Closes #11931 from cloud-fan/repl.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/e9b6e7d8
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/e9b6e7d8
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/e9b6e7d8

Branch: refs/heads/master
Commit: e9b6e7d8577cd721a433130f29e8b112d98768b9
Parents: 70a6f0b
Author: Wenchen Fan <wenc...@databricks.com>
Authored: Fri Mar 25 20:19:04 2016 +0800
Committer: Cheng Lian <l...@databricks.com>
Committed: Fri Mar 25 20:19:04 2016 +0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/repl/ReplSuite.scala | 15 ++++++++
 .../spark/sql/catalyst/analysis/Analyzer.scala  |  2 +-
 .../sql/catalyst/encoders/OuterScopes.scala     | 39 +++++++++++++-------
 .../sql/catalyst/expressions/objects.scala      | 12 +++---
 4 files changed, 48 insertions(+), 20 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
----------------------------------------------------------------------
diff --git 
a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index f148a6d..dbfacba 100644
--- a/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/scala-2.11/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -59,6 +59,10 @@ class ReplSuite extends SparkFunSuite {
     return out.toString
   }
 
+  // Simulate the paste mode in Scala REPL.
+  def runInterpreterInPasteMode(master: String, input: String): String =
+    runInterpreter(master, ":paste\n" + input + 4.toChar) // 4 is the ascii 
code of CTRL + D
+
   def assertContains(message: String, output: String) {
     val isContain = output.contains(message)
     assert(isContain,
@@ -381,4 +385,15 @@ class ReplSuite extends SparkFunSuite {
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
   }
+
+  test("define case class and create Dataset together with paste mode") {
+    val output = runInterpreterInPasteMode("local-cluster[1,1,1024]",
+      """
+        |import sqlContext.implicits._
+        |case class TestClass(value: Int)
+        |Seq(TestClass(1)).toDS()
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
 }

http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
index b344e04..89b18af 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/analysis/Analyzer.scala
@@ -605,7 +605,7 @@ class Analyzer(
                 "access to the scope that this class was defined in.\n" +
                 "Try moving this class out of its parent class.")
           }
-          n.copy(outerPointer = Some(Literal.fromObject(outer)))
+          n.copy(outerPointer = Some(outer))
       }
     }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
index c047e96..a1f0312 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/encoders/OuterScopes.scala
@@ -42,7 +42,12 @@ object OuterScopes {
     outerScopes.putIfAbsent(outer.getClass.getName, outer)
   }
 
-  def getOuterScope(innerCls: Class[_]): AnyRef = {
+  /**
+   * Returns a function which can get the outer scope for the given inner 
class.  By using function
+   * as return type, we can delay the process of getting outer pointer to 
execution time, which is
+   * useful for inner class defined in REPL.
+   */
+  def getOuterScope(innerCls: Class[_]): () => AnyRef = {
     assert(innerCls.isMemberClass)
     val outerClassName = innerCls.getDeclaringClass.getName
     val outer = outerScopes.get(outerClassName)
@@ -53,24 +58,30 @@ object OuterScopes {
         // `INSTANCE()` method to get the single instance of class `$read`. 
Then call `$iw()`
         // method multiply times to get the single instance of the inner most 
`$iw` class.
         case REPLClass(baseClassName) =>
-          val objClass = Utils.classForName(baseClassName + "$")
-          val objInstance = objClass.getField("MODULE$").get(null)
-          val baseInstance = objClass.getMethod("INSTANCE").invoke(objInstance)
-          val baseClass = Utils.classForName(baseClassName)
+          () => {
+            val objClass = Utils.classForName(baseClassName + "$")
+            val objInstance = objClass.getField("MODULE$").get(null)
+            val baseInstance = 
objClass.getMethod("INSTANCE").invoke(objInstance)
+            val baseClass = Utils.classForName(baseClassName)
 
-          var getter = iwGetter(baseClass)
-          var obj = baseInstance
-          while (getter != null) {
-            obj = getter.invoke(obj)
-            getter = iwGetter(getter.getReturnType)
-          }
+            var getter = iwGetter(baseClass)
+            var obj = baseInstance
+            while (getter != null) {
+              obj = getter.invoke(obj)
+              getter = iwGetter(getter.getReturnType)
+            }
 
-          outerScopes.putIfAbsent(outerClassName, obj)
-          obj
+            if (obj == null) {
+              throw new RuntimeException(s"Failed to get outer pointer for 
${innerCls.getName}")
+            }
+
+            outerScopes.putIfAbsent(outerClassName, obj)
+            obj
+          }
         case _ => null
       }
     } else {
-      outer
+      () => outer
     }
   }
 

http://git-wip-us.apache.org/repos/asf/spark/blob/e9b6e7d8/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
index 7eba617..07b67a0 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/objects.scala
@@ -197,15 +197,17 @@ object NewInstance {
  * @param dataType The type of object being constructed, as a Spark SQL 
datatype.  This allows you
  *                 to manually specify the type when the object in question is 
a valid internal
  *                 representation (i.e. ArrayData) instead of an object.
- * @param outerPointer If the object being constructed is an inner class the 
outerPointer must
- *                     for the containing class must be specified.
+ * @param outerPointer If the object being constructed is an inner class, the 
outerPointer for the
+ *                     containing class must be specified. This parameter is 
defined as an optional
+ *                     function, which allows us to get the outer pointer 
lazily,and it's useful if
+ *                     the inner class is defined in REPL.
  */
 case class NewInstance(
     cls: Class[_],
     arguments: Seq[Expression],
     propagateNull: Boolean,
     dataType: DataType,
-    outerPointer: Option[Literal]) extends Expression with NonSQLExpression {
+    outerPointer: Option[() => AnyRef]) extends Expression with 
NonSQLExpression {
   private val className = cls.getName
 
   override def nullable: Boolean = propagateNull
@@ -220,12 +222,12 @@ case class NewInstance(
     val argGen = arguments.map(_.gen(ctx))
     val argString = argGen.map(_.value).mkString(", ")
 
-    val outer = outerPointer.map(_.gen(ctx))
+    val outer = outerPointer.map(func => Literal.fromObject(func()).gen(ctx))
 
     val setup =
       s"""
          ${argGen.map(_.code).mkString("\n")}
-         ${outer.map(_.code.mkString("")).getOrElse("")}
+         ${outer.map(_.code).getOrElse("")}
        """.stripMargin
 
     val constructorCall = outer.map { gen =>


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to