git commit: SPARK-2632, SPARK-2576. Fixed by only importing what is necessary during class definition.

marmbrus Thu, 31 Jul 2014 22:57:28 -0700

Repository: spark
Updated Branches:
  refs/heads/master 2cdc3e5c6 -> 149910111



SPARK-2632, SPARK-2576. Fixed by only importing what is necessary during class 
definition.

Without this patch, it imports everything available in the scope.

```scala

scala> val a = 10l
val a = 10l
a: Long = 10

scala> import a._
import a._
import a._

scala> case class A(a: Int) // show
case class A(a: Int) // show
class $read extends Serializable {
  def <init>() = {
    super.<init>;
    ()
  };
  class $iwC extends Serializable {
    def <init>() = {
      super.<init>;
      ()
    };
    class $iwC extends Serializable {
      def <init>() = {
        super.<init>;
        ()
      };
      import org.apache.spark.SparkContext._;
      class $iwC extends Serializable {
        def <init>() = {
          super.<init>;
          ()
        };
        val $VAL5 = $line5.$read.INSTANCE;
        import $VAL5.$iw.$iw.$iw.$iw.a;
        class $iwC extends Serializable {
          def <init>() = {
            super.<init>;
            ()
          };
          import a._;
          class $iwC extends Serializable {
            def <init>() = {
              super.<init>;
              ()
            };
            class $iwC extends Serializable {
              def <init>() = {
                super.<init>;
                ()
              };
              case class A extends scala.Product with scala.Serializable {
                <caseaccessor> <paramaccessor> val a: Int = _;
                def <init>(a: Int) = {
                  super.<init>;
                  ()
                }
              }
            };
            val $iw = new $iwC.<init>
          };
          val $iw = new $iwC.<init>
        };
        val $iw = new $iwC.<init>
      };
      val $iw = new $iwC.<init>
    };
    val $iw = new $iwC.<init>
  };
  val $iw = new $iwC.<init>
}
object $read extends scala.AnyRef {
  def <init>() = {
    super.<init>;
    ()
  };
  val INSTANCE = new $read.<init>
}
defined class A
```

With this patch, it just imports  only the necessary.

```scala

scala> val a = 10l
val a = 10l
a: Long = 10

scala> import a._
import a._
import a._

scala> case class A(a: Int) // show
case class A(a: Int) // show
class $read extends Serializable {
  def <init>() = {
    super.<init>;
    ()
  };
  class $iwC extends Serializable {
    def <init>() = {
      super.<init>;
      ()
    };
    class $iwC extends Serializable {
      def <init>() = {
        super.<init>;
        ()
      };
      case class A extends scala.Product with scala.Serializable {
        <caseaccessor> <paramaccessor> val a: Int = _;
        def <init>(a: Int) = {
          super.<init>;
          ()
        }
      }
    };
    val $iw = new $iwC.<init>
  };
  val $iw = new $iwC.<init>
}
object $read extends scala.AnyRef {
  def <init>() = {
    super.<init>;
    ()
  };
  val INSTANCE = new $read.<init>
}
defined class A

scala>

```

This patch also adds a `:fallback` mode on being enabled it will restore the 
spark-shell's 1.0.0 behaviour.

Author: Prashant Sharma <[email protected]>
Author: Yin Huai <[email protected]>
Author: Prashant Sharma <[email protected]>

Closes #1635 from ScrapCodes/repl-fix-necessary-imports and squashes the 
following commits:

b1968d2 [Prashant Sharma] Added toschemaRDD to test case.
0b712bb [Yin Huai] Add a REPL test to test importing a method.
02ad8ff [Yin Huai] Add a REPL test for importing SQLContext.createSchemaRDD.
ed6d0c7 [Prashant Sharma] Added a fallback mode, incase users run into issues 
while using repl.
b63d3b2 [Prashant Sharma] SPARK-2632, SPARK-2576. Fixed by only importing what 
is necessary during class definition.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/14991011
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/14991011
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/14991011

Branch: refs/heads/master
Commit: 149910111331133d52e0cb01b256f7f731b436ad
Parents: 2cdc3e5
Author: Prashant Sharma <[email protected]>
Authored: Thu Jul 31 22:57:13 2014 -0700
Committer: Michael Armbrust <[email protected]>
Committed: Thu Jul 31 22:57:13 2014 -0700

----------------------------------------------------------------------
 repl/pom.xml                                    |  6 +++++
 .../org/apache/spark/repl/SparkILoop.scala      | 17 ++++++++++++
 .../org/apache/spark/repl/SparkIMain.scala      |  7 ++++-
 .../org/apache/spark/repl/SparkImports.scala    | 15 ++++++++---
 .../scala/org/apache/spark/repl/ReplSuite.scala | 27 ++++++++++++++++++++
 5 files changed, 67 insertions(+), 5 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/14991011/repl/pom.xml
----------------------------------------------------------------------
diff --git a/repl/pom.xml b/repl/pom.xml
index 4ebb1b8..68f4504 100644
--- a/repl/pom.xml
+++ b/repl/pom.xml
@@ -56,6 +56,12 @@
       <scope>runtime</scope>
     </dependency>
     <dependency>
+      <groupId>org.apache.spark</groupId>
+      <artifactId>spark-sql_${scala.binary.version}</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
+    <dependency>
       <groupId>org.eclipse.jetty</groupId>
       <artifactId>jetty-server</artifactId>
     </dependency>

http://git-wip-us.apache.org/repos/asf/spark/blob/14991011/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala 
b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
index 6f9fa0d..42c7e51 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkILoop.scala
@@ -230,6 +230,20 @@ class SparkILoop(in0: Option[BufferedReader], protected 
val out: JPrintWriter,
       case xs       => xs find (_.name == cmd)
     }
   }
+  private var fallbackMode = false 
+
+  private def toggleFallbackMode() {
+    val old = fallbackMode
+    fallbackMode = !old
+    System.setProperty("spark.repl.fallback", fallbackMode.toString)
+    echo(s"""
+      |Switched ${if (old) "off" else "on"} fallback mode without restarting.
+      |       If you have defined classes in the repl, it would 
+      |be good to redefine them incase you plan to use them. If you still run
+      |into issues it would be good to restart the repl and turn on 
`:fallback` 
+      |mode as first command.
+      """.stripMargin)
+  }
 
   /** Show the history */
   lazy val historyCommand = new LoopCommand("history", "show the history 
(optional num is commands to show)") {
@@ -299,6 +313,9 @@ class SparkILoop(in0: Option[BufferedReader], protected val 
out: JPrintWriter,
     nullary("reset", "reset the repl to its initial state, forgetting all 
session entries", resetCommand),
     shCommand,
     nullary("silent", "disable/enable automatic printing of results", 
verbosity),
+    nullary("fallback", """
+                           |disable/enable advanced repl changes, these fix 
some issues but may introduce others. 
+                           |This mode will be removed once these fixes 
stablize""".stripMargin, toggleFallbackMode),
     cmd("type", "[-v] <expr>", "display the type of an expression without 
evaluating it", typeCommand),
     nullary("warnings", "show the suppressed warnings from the most recent 
line which had any", warningsCommand)
   )

http://git-wip-us.apache.org/repos/asf/spark/blob/14991011/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala 
b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
index 3842c29..f60bbb4 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkIMain.scala
@@ -892,11 +892,16 @@ import org.apache.spark.util.Utils
     def definedTypeSymbol(name: String) = definedSymbols(newTypeName(name))
     def definedTermSymbol(name: String) = definedSymbols(newTermName(name))
 
+    val definedClasses = handlers.exists {
+      case _: ClassHandler => true
+      case _ => false
+    }
+
     /** Code to import bound names from previous lines - accessPath is code to
      * append to objectName to access anything bound by request.
      */
     val SparkComputedImports(importsPreamble, importsTrailer, accessPath) =
-      importsCode(referencedNames.toSet)
+      importsCode(referencedNames.toSet, definedClasses)
 
     /** Code to access a variable with the specified name */
     def fullPath(vname: String) = {

http://git-wip-us.apache.org/repos/asf/spark/blob/14991011/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
----------------------------------------------------------------------
diff --git a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala 
b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
index 9099e05..193a42d 100644
--- a/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
+++ b/repl/src/main/scala/org/apache/spark/repl/SparkImports.scala
@@ -108,8 +108,9 @@ trait SparkImports {
    * last one imported is actually usable.
    */
   case class SparkComputedImports(prepend: String, append: String, access: 
String)
+  def fallback = System.getProperty("spark.repl.fallback", "false").toBoolean
 
-  protected def importsCode(wanted: Set[Name]): SparkComputedImports = {
+  protected def importsCode(wanted: Set[Name], definedClass: Boolean): 
SparkComputedImports = {
     /** Narrow down the list of requests from which imports
      *  should be taken.  Removes requests which cannot contribute
      *  useful imports for the specified set of wanted names.
@@ -124,8 +125,14 @@ trait SparkImports {
         // Single symbol imports might be implicits! See bug #1752.  Rather 
than
         // try to finesse this, we will mimic all imports for now.
         def keepHandler(handler: MemberHandler) = handler match {
-          case _: ImportHandler => true
-          case x                => x.definesImplicit || (x.definedNames exists 
wanted)
+       /* This case clause tries to "precisely" import only what is required. 
And in this
+        * it may miss out on some implicits, because implicits are not known 
in `wanted`. Thus 
+        * it is suitable for defining classes. AFAIK while defining classes 
implicits are not
+        * needed.*/
+          case h: ImportHandler if definedClass && !fallback => 
+            h.importedNames.exists(x => wanted.contains(x))
+          case _: ImportHandler  => true
+          case x                 => x.definesImplicit || (x.definedNames 
exists wanted)
         }
 
         reqs match {
@@ -182,7 +189,7 @@ trait SparkImports {
         // ambiguity errors will not be generated. Also, quote
         // the name of the variable, so that we don't need to
         // handle quoting keywords separately.
-        case x: ClassHandler =>
+        case x: ClassHandler if !fallback =>
         // I am trying to guess if the import is a defined class
         // This is an ugly hack, I am not 100% sure of the consequences.
         // Here we, let everything but "defined classes" use the import with 
val.

http://git-wip-us.apache.org/repos/asf/spark/blob/14991011/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
----------------------------------------------------------------------
diff --git a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala 
b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
index e2d8d5f..c8763eb 100644
--- a/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
+++ b/repl/src/test/scala/org/apache/spark/repl/ReplSuite.scala
@@ -256,6 +256,33 @@ class ReplSuite extends FunSuite {
     assertDoesNotContain("error:", output)
     assertDoesNotContain("Exception", output)
   }
+
+  test("SPARK-2576 importing SQLContext.createSchemaRDD.") {
+    // We need to use local-cluster to test this case.
+    val output = runInterpreter("local-cluster[1,1,512]",
+      """
+        |val sqlContext = new org.apache.spark.sql.SQLContext(sc)
+        |import sqlContext.createSchemaRDD
+        |case class TestCaseClass(value: Int)
+        |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).toSchemaRDD.collect
+      """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
+  test("SPARK-2632 importing a method from non serializable class and not 
using it.") {
+    val output = runInterpreter("local",
+    """
+      |class TestClass() { def testMethod = 3 }
+      |val t = new TestClass
+      |import t.testMethod
+      |case class TestCaseClass(value: Int)
+      |sc.parallelize(1 to 10).map(x => TestCaseClass(x)).collect
+    """.stripMargin)
+    assertDoesNotContain("error:", output)
+    assertDoesNotContain("Exception", output)
+  }
+
   if (System.getenv("MESOS_NATIVE_LIBRARY") != null) {
     test("running on Mesos") {
       val output = runInterpreter("localquiet",

git commit: SPARK-2632, SPARK-2576. Fixed by only importing what is necessary during class definition.

Reply via email to