Repository: spark
Updated Branches:
  refs/heads/master 8ad486add -> 0b7b7fd45


[SPARK-1194] Fix the same-RDD rule for cache replacement

SPARK-1194: https://spark-project.atlassian.net/browse/SPARK-1194

In the current implementation, when selecting candidate blocks to be swapped 
out, once we find a block from the same RDD that the block to be stored belongs 
to, cache eviction fails  and aborts.

In this PR, we keep selecting blocks *not* from the RDD that the block to be 
stored belongs to until either enough free space can be ensured (cache eviction 
succeeds) or all such blocks are checked (cache eviction fails).

Author: Cheng Lian <lian.cs....@gmail.com>

Closes #96 from liancheng/fix-spark-1194 and squashes the following commits:

2524ab9 [Cheng Lian] Added regression test case for SPARK-1194
6e40c22 [Cheng Lian] Remove redundant comments
40cdcb2 [Cheng Lian] Bug fix, and addressed PR comments from @mridulm
62c92ac [Cheng Lian] Fixed SPARK-1194 
https://spark-project.atlassian.net/browse/SPARK-1194


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/0b7b7fd4
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/0b7b7fd4
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/0b7b7fd4

Branch: refs/heads/master
Commit: 0b7b7fd45cd9037d23cb090e62be3ff075214fe7
Parents: 8ad486a
Author: Cheng Lian <lian.cs....@gmail.com>
Authored: Fri Mar 7 23:26:46 2014 -0800
Committer: Patrick Wendell <pwend...@gmail.com>
Committed: Fri Mar 7 23:26:46 2014 -0800

----------------------------------------------------------------------
 .../scala/org/apache/spark/storage/MemoryStore.scala  | 11 +++++------
 .../org/apache/spark/storage/BlockManagerSuite.scala  | 14 ++++++++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/0b7b7fd4/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
----------------------------------------------------------------------
diff --git a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala 
b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
index b89212e..38836d4 100644
--- a/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
+++ b/core/src/main/scala/org/apache/spark/storage/MemoryStore.scala
@@ -236,13 +236,10 @@ private class MemoryStore(blockManager: BlockManager, 
maxMemory: Long)
         while (maxMemory - (currentMemory - selectedMemory) < space && 
iterator.hasNext) {
           val pair = iterator.next()
           val blockId = pair.getKey
-          if (rddToAdd.isDefined && rddToAdd == getRddId(blockId)) {
-            logInfo("Will not store " + blockIdToAdd + " as it would require 
dropping another " +
-              "block from the same RDD")
-            return false
+          if (rddToAdd.isEmpty || rddToAdd != getRddId(blockId)) {
+            selectedBlocks += blockId
+            selectedMemory += pair.getValue.size
           }
-          selectedBlocks += blockId
-          selectedMemory += pair.getValue.size
         }
       }
 
@@ -264,6 +261,8 @@ private class MemoryStore(blockManager: BlockManager, 
maxMemory: Long)
         }
         return true
       } else {
+        logInfo(s"Will not store $blockIdToAdd as it would require dropping 
another block " +
+          "from the same RDD")
         return false
       }
     }

http://git-wip-us.apache.org/repos/asf/spark/blob/0b7b7fd4/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala 
b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
index 121e47c..1036b9f 100644
--- a/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
+++ b/core/src/test/scala/org/apache/spark/storage/BlockManagerSuite.scala
@@ -662,4 +662,18 @@ class BlockManagerSuite extends FunSuite with 
BeforeAndAfter with PrivateMethodT
       assert(store.getSingle("a1") == None, "a1 should not be in store")
     }
   }
+
+  test("SPARK-1194 regression: fix the same-RDD rule for cache replacement") {
+    store = new BlockManager("<driver>", actorSystem, master, serializer, 
1200, conf, securityMgr)
+    store.putSingle(rdd(0, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    store.putSingle(rdd(1, 0), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    // Access rdd_1_0 to ensure it's not least recently used.
+    assert(store.getSingle(rdd(1, 0)).isDefined, "rdd_1_0 was not in store")
+    // According to the same-RDD rule, rdd_1_0 should be replaced here.
+    store.putSingle(rdd(0, 1), new Array[Byte](400), StorageLevel.MEMORY_ONLY)
+    // rdd_1_0 should have been replaced, even it's not least recently used.
+    assert(store.memoryStore.contains(rdd(0, 0)), "rdd_0_0 was not in store")
+    assert(store.memoryStore.contains(rdd(0, 1)), "rdd_0_1 was not in store")
+    assert(!store.memoryStore.contains(rdd(1, 0)), "rdd_1_0 was in store")
+  }
 }

Reply via email to