Repository: spark
Updated Branches:
  refs/heads/branch-1.0 f6323eb3b -> 5c8e8de99


SPARK-1775: Unneeded lock in ShuffleMapTask.deserializeInfo

This was used in the past to have a cache of deserialized ShuffleMapTasks, but 
that's been removed, so there's no need for a lock. It slows down Spark when 
task descriptions are large, e.g. due to large lineage graphs or local 
variables.

Author: Sandeep <sand...@techaddict.me>

Closes #707 from techaddict/SPARK-1775 and squashes the following commits:

18d8ebf [Sandeep] SPARK-1775: Unneeded lock in ShuffleMapTask.deserializeInfo 
This was used in the past to have a cache of deserialized ShuffleMapTasks, but 
that's been removed, so there's no need for a lock. It slows down Spark when 
task descriptions are large, e.g. due to large lineage graphs or local 
variables.
(cherry picked from commit 7db47c463fefc244e9c100d4aab90451c3828261)

Signed-off-by: Patrick Wendell <pwend...@gmail.com>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/5c8e8de9
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/5c8e8de9
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/5c8e8de9

Branch: refs/heads/branch-1.0
Commit: 5c8e8de99ffa5aadc1a130c9a3cbeb3c4936eb71
Parents: f6323eb
Author: Sandeep <sand...@techaddict.me>
Authored: Thu May 8 22:30:17 2014 -0700
Committer: Patrick Wendell <pwend...@gmail.com>
Committed: Thu May 8 22:30:58 2014 -0700

----------------------------------------------------------------------
 .../org/apache/spark/scheduler/ShuffleMapTask.scala | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/5c8e8de9/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
----------------------------------------------------------------------
diff --git 
a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala 
b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
index 4b0324f..9ba586f 100644
--- a/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
+++ b/core/src/main/scala/org/apache/spark/scheduler/ShuffleMapTask.scala
@@ -57,15 +57,13 @@ private[spark] object ShuffleMapTask {
   }
 
   def deserializeInfo(stageId: Int, bytes: Array[Byte]): (RDD[_], 
ShuffleDependency[_,_]) = {
-    synchronized {
-      val loader = Thread.currentThread.getContextClassLoader
-      val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
-      val ser = SparkEnv.get.closureSerializer.newInstance()
-      val objIn = ser.deserializeStream(in)
-      val rdd = objIn.readObject().asInstanceOf[RDD[_]]
-      val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_,_]]
-      (rdd, dep)
-    }
+    val loader = Thread.currentThread.getContextClassLoader
+    val in = new GZIPInputStream(new ByteArrayInputStream(bytes))
+    val ser = SparkEnv.get.closureSerializer.newInstance()
+    val objIn = ser.deserializeStream(in)
+    val rdd = objIn.readObject().asInstanceOf[RDD[_]]
+    val dep = objIn.readObject().asInstanceOf[ShuffleDependency[_,_]]
+    (rdd, dep)
   }
 
   // Since both the JarSet and FileSet have the same format this is used for 
both.

Reply via email to