Updated Branches:
  refs/heads/master c46067f09 -> 0e2109ddb

Added unit tests for size estimation for specialized hash sets and maps.


Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/95c55df1
Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/95c55df1
Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/95c55df1

Branch: refs/heads/master
Commit: 95c55df1c21c1b8a90962415861b27ef91d3b20e
Parents: 62889c4
Author: Reynold Xin <[email protected]>
Authored: Mon Nov 25 18:27:06 2013 +0800
Committer: Reynold Xin <[email protected]>
Committed: Mon Nov 25 18:27:06 2013 +0800

----------------------------------------------------------------------
 .../util/collection/OpenHashMapSuite.scala      |  16 ++-
 .../util/collection/OpenHashSetSuite.scala      |  20 +++-
 .../PrimitiveKeyOpenHashMapSuite.scala          | 102 +++++++++++++++++++
 .../PrimitiveKeyOpenHashSetSuite.scala          |  90 ----------------
 4 files changed, 135 insertions(+), 93 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala 
b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
index ca3f684..63e874f 100644
--- 
a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala
@@ -2,8 +2,20 @@ package org.apache.spark.util.collection
 
 import scala.collection.mutable.HashSet
 import org.scalatest.FunSuite
-
-class OpenHashMapSuite extends FunSuite {
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.util.SizeEstimator
+
+class OpenHashMapSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive value (int)") {
+    val capacity = 1024
+    val map = new OpenHashMap[String, Int](capacity)
+    val actualSize = SizeEstimator.estimate(map)
+    // 64 bit for pointers, 32 bit for ints, and 1 bit for the bitset.
+    val expectedSize = capacity * (64 + 32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond 
our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
 
   test("initialization") {
     val goodMap1 = new OpenHashMap[String, Int](1)

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala 
b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
index 4e11e8a..4768a1e 100644
--- 
a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala
@@ -1,9 +1,27 @@
 package org.apache.spark.util.collection
 
 import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
 
+import org.apache.spark.util.SizeEstimator
 
-class OpenHashSetSuite extends FunSuite {
+
+class OpenHashSetSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive int") {
+    val loadFactor = 0.7
+    val set = new OpenHashSet[Int](64, loadFactor)
+    for (i <- 0 until 1024) {
+      set.add(i)
+    }
+    assert(set.size === 1024)
+    assert(set.capacity > 1024)
+    val actualSize = SizeEstimator.estimate(set)
+    // 32 bits for the ints + 1 bit for the bitset
+    val expectedSize = set.capacity * (32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond 
our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
 
   test("primitive int") {
     val set = new OpenHashSet[Int]

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
new file mode 100644
index 0000000..2220b4f
--- /dev/null
+++ 
b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala
@@ -0,0 +1,102 @@
+package org.apache.spark.util.collection
+
+import scala.collection.mutable.HashSet
+import org.scalatest.FunSuite
+import org.scalatest.matchers.ShouldMatchers
+import org.apache.spark.util.SizeEstimator
+
+class PrimitiveKeyOpenHashMapSuite extends FunSuite with ShouldMatchers {
+
+  test("size for specialized, primitive key, value (int, int)") {
+    val capacity = 1024
+    val map = new PrimitiveKeyOpenHashMap[Int, Int](capacity)
+    val actualSize = SizeEstimator.estimate(map)
+    // 32 bit for keys, 32 bit for values, and 1 bit for the bitset.
+    val expectedSize = capacity * (32 + 32 + 1) / 8
+    // Make sure we are not allocating a significant amount of memory beyond 
our expected.
+    actualSize should be <= (expectedSize * 1.1).toLong
+  }
+
+  test("initialization") {
+    val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1)
+    assert(goodMap1.size === 0)
+    val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255)
+    assert(goodMap2.size === 0)
+    val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256)
+    assert(goodMap3.size === 0)
+    intercept[IllegalArgumentException] {
+      new PrimitiveKeyOpenHashMap[Int, Int](1 << 30) // Invalid map size: 
bigger than 2^29
+    }
+    intercept[IllegalArgumentException] {
+      new PrimitiveKeyOpenHashMap[Int, Int](-1)
+    }
+    intercept[IllegalArgumentException] {
+      new PrimitiveKeyOpenHashMap[Int, Int](0)
+    }
+  }
+
+  test("basic operations") {
+    val longBase = 1000000L
+    val map = new PrimitiveKeyOpenHashMap[Long, Int]
+
+    for (i <- 1 to 1000) {
+      map(i + longBase) = i
+      assert(map(i + longBase) === i)
+    }
+
+    assert(map.size === 1000)
+
+    for (i <- 1 to 1000) {
+      assert(map(i + longBase) === i)
+    }
+
+    // Test iterator
+    val set = new HashSet[(Long, Int)]
+    for ((k, v) <- map) {
+      set.add((k, v))
+    }
+    assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet)
+  }
+
+  test("null values") {
+    val map = new PrimitiveKeyOpenHashMap[Long, String]()
+    for (i <- 1 to 100) {
+      map(i.toLong) = null
+    }
+    assert(map.size === 100)
+    assert(map(1.toLong) === null)
+  }
+
+  test("changeValue") {
+    val map = new PrimitiveKeyOpenHashMap[Long, String]()
+    for (i <- 1 to 100) {
+      map(i.toLong) = i.toString
+    }
+    assert(map.size === 100)
+    for (i <- 1 to 100) {
+      val res = map.changeValue(i.toLong, { assert(false); "" }, v => {
+        assert(v === i.toString)
+        v + "!"
+      })
+      assert(res === i + "!")
+    }
+    // Iterate from 101 to 400 to make sure the map grows a couple of times, 
because we had a
+    // bug where changeValue would return the wrong result when the map grew 
on that insert
+    for (i <- 101 to 400) {
+      val res = map.changeValue(i.toLong, { i + "!" }, v => { assert(false); v 
})
+      assert(res === i + "!")
+    }
+    assert(map.size === 400)
+  }
+
+  test("inserting in capacity-1 map") {
+    val map = new PrimitiveKeyOpenHashMap[Long, String](1)
+    for (i <- 1 to 100) {
+      map(i.toLong) = i.toString
+    }
+    assert(map.size === 100)
+    for (i <- 1 to 100) {
+      assert(map(i.toLong) === i.toString)
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
 
b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
deleted file mode 100644
index dfd6aed..0000000
--- 
a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala
+++ /dev/null
@@ -1,90 +0,0 @@
-package org.apache.spark.util.collection
-
-import scala.collection.mutable.HashSet
-import org.scalatest.FunSuite
-
-class PrimitiveKeyOpenHashSetSuite extends FunSuite {
-
-  test("initialization") {
-    val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1)
-    assert(goodMap1.size === 0)
-    val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255)
-    assert(goodMap2.size === 0)
-    val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256)
-    assert(goodMap3.size === 0)
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](1 << 30) // Invalid map size: 
bigger than 2^29
-    }
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](-1)
-    }
-    intercept[IllegalArgumentException] {
-      new PrimitiveKeyOpenHashMap[Int, Int](0)
-    }
-  }
-
-  test("basic operations") {
-    val longBase = 1000000L
-    val map = new PrimitiveKeyOpenHashMap[Long, Int]
-
-    for (i <- 1 to 1000) {
-      map(i + longBase) = i
-      assert(map(i + longBase) === i)
-    }
-
-    assert(map.size === 1000)
-
-    for (i <- 1 to 1000) {
-      assert(map(i + longBase) === i)
-    }
-
-    // Test iterator
-    val set = new HashSet[(Long, Int)]
-    for ((k, v) <- map) {
-      set.add((k, v))
-    }
-    assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet)
-  }
-
-  test("null values") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String]()
-    for (i <- 1 to 100) {
-      map(i.toLong) = null
-    }
-    assert(map.size === 100)
-    assert(map(1.toLong) === null)
-  }
-
-  test("changeValue") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String]()
-    for (i <- 1 to 100) {
-      map(i.toLong) = i.toString
-    }
-    assert(map.size === 100)
-    for (i <- 1 to 100) {
-      val res = map.changeValue(i.toLong, { assert(false); "" }, v => {
-        assert(v === i.toString)
-        v + "!"
-      })
-      assert(res === i + "!")
-    }
-    // Iterate from 101 to 400 to make sure the map grows a couple of times, 
because we had a
-    // bug where changeValue would return the wrong result when the map grew 
on that insert
-    for (i <- 101 to 400) {
-      val res = map.changeValue(i.toLong, { i + "!" }, v => { assert(false); v 
})
-      assert(res === i + "!")
-    }
-    assert(map.size === 400)
-  }
-
-  test("inserting in capacity-1 map") {
-    val map = new PrimitiveKeyOpenHashMap[Long, String](1)
-    for (i <- 1 to 100) {
-      map(i.toLong) = i.toString
-    }
-    assert(map.size === 100)
-    for (i <- 1 to 100) {
-      assert(map(i.toLong) === i.toString)
-    }
-  }
-}

Reply via email to