Updated Branches: refs/heads/master c46067f09 -> 0e2109ddb
Added unit tests for size estimation for specialized hash sets and maps. Project: http://git-wip-us.apache.org/repos/asf/incubator-spark/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-spark/commit/95c55df1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-spark/tree/95c55df1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-spark/diff/95c55df1 Branch: refs/heads/master Commit: 95c55df1c21c1b8a90962415861b27ef91d3b20e Parents: 62889c4 Author: Reynold Xin <[email protected]> Authored: Mon Nov 25 18:27:06 2013 +0800 Committer: Reynold Xin <[email protected]> Committed: Mon Nov 25 18:27:06 2013 +0800 ---------------------------------------------------------------------- .../util/collection/OpenHashMapSuite.scala | 16 ++- .../util/collection/OpenHashSetSuite.scala | 20 +++- .../PrimitiveKeyOpenHashMapSuite.scala | 102 +++++++++++++++++++ .../PrimitiveKeyOpenHashSetSuite.scala | 90 ---------------- 4 files changed, 135 insertions(+), 93 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala index ca3f684..63e874f 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashMapSuite.scala @@ -2,8 +2,20 @@ package org.apache.spark.util.collection import scala.collection.mutable.HashSet import org.scalatest.FunSuite - -class OpenHashMapSuite extends FunSuite { +import org.scalatest.matchers.ShouldMatchers +import org.apache.spark.util.SizeEstimator + +class OpenHashMapSuite extends FunSuite with ShouldMatchers { + + test("size for specialized, primitive value (int)") { + val capacity = 1024 + val map = new OpenHashMap[String, Int](capacity) + val actualSize = SizeEstimator.estimate(map) + // 64 bit for pointers, 32 bit for ints, and 1 bit for the bitset. + val expectedSize = capacity * (64 + 32 + 1) / 8 + // Make sure we are not allocating a significant amount of memory beyond our expected. + actualSize should be <= (expectedSize * 1.1).toLong + } test("initialization") { val goodMap1 = new OpenHashMap[String, Int](1) http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala index 4e11e8a..4768a1e 100644 --- a/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala +++ b/core/src/test/scala/org/apache/spark/util/collection/OpenHashSetSuite.scala @@ -1,9 +1,27 @@ package org.apache.spark.util.collection import org.scalatest.FunSuite +import org.scalatest.matchers.ShouldMatchers +import org.apache.spark.util.SizeEstimator -class OpenHashSetSuite extends FunSuite { + +class OpenHashSetSuite extends FunSuite with ShouldMatchers { + + test("size for specialized, primitive int") { + val loadFactor = 0.7 + val set = new OpenHashSet[Int](64, loadFactor) + for (i <- 0 until 1024) { + set.add(i) + } + assert(set.size === 1024) + assert(set.capacity > 1024) + val actualSize = SizeEstimator.estimate(set) + // 32 bits for the ints + 1 bit for the bitset + val expectedSize = set.capacity * (32 + 1) / 8 + // Make sure we are not allocating a significant amount of memory beyond our expected. + actualSize should be <= (expectedSize * 1.1).toLong + } test("primitive int") { val set = new OpenHashSet[Int] http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala new file mode 100644 index 0000000..2220b4f --- /dev/null +++ b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashMapSuite.scala @@ -0,0 +1,102 @@ +package org.apache.spark.util.collection + +import scala.collection.mutable.HashSet +import org.scalatest.FunSuite +import org.scalatest.matchers.ShouldMatchers +import org.apache.spark.util.SizeEstimator + +class PrimitiveKeyOpenHashMapSuite extends FunSuite with ShouldMatchers { + + test("size for specialized, primitive key, value (int, int)") { + val capacity = 1024 + val map = new PrimitiveKeyOpenHashMap[Int, Int](capacity) + val actualSize = SizeEstimator.estimate(map) + // 32 bit for keys, 32 bit for values, and 1 bit for the bitset. + val expectedSize = capacity * (32 + 32 + 1) / 8 + // Make sure we are not allocating a significant amount of memory beyond our expected. + actualSize should be <= (expectedSize * 1.1).toLong + } + + test("initialization") { + val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1) + assert(goodMap1.size === 0) + val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255) + assert(goodMap2.size === 0) + val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256) + assert(goodMap3.size === 0) + intercept[IllegalArgumentException] { + new PrimitiveKeyOpenHashMap[Int, Int](1 << 30) // Invalid map size: bigger than 2^29 + } + intercept[IllegalArgumentException] { + new PrimitiveKeyOpenHashMap[Int, Int](-1) + } + intercept[IllegalArgumentException] { + new PrimitiveKeyOpenHashMap[Int, Int](0) + } + } + + test("basic operations") { + val longBase = 1000000L + val map = new PrimitiveKeyOpenHashMap[Long, Int] + + for (i <- 1 to 1000) { + map(i + longBase) = i + assert(map(i + longBase) === i) + } + + assert(map.size === 1000) + + for (i <- 1 to 1000) { + assert(map(i + longBase) === i) + } + + // Test iterator + val set = new HashSet[(Long, Int)] + for ((k, v) <- map) { + set.add((k, v)) + } + assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet) + } + + test("null values") { + val map = new PrimitiveKeyOpenHashMap[Long, String]() + for (i <- 1 to 100) { + map(i.toLong) = null + } + assert(map.size === 100) + assert(map(1.toLong) === null) + } + + test("changeValue") { + val map = new PrimitiveKeyOpenHashMap[Long, String]() + for (i <- 1 to 100) { + map(i.toLong) = i.toString + } + assert(map.size === 100) + for (i <- 1 to 100) { + val res = map.changeValue(i.toLong, { assert(false); "" }, v => { + assert(v === i.toString) + v + "!" + }) + assert(res === i + "!") + } + // Iterate from 101 to 400 to make sure the map grows a couple of times, because we had a + // bug where changeValue would return the wrong result when the map grew on that insert + for (i <- 101 to 400) { + val res = map.changeValue(i.toLong, { i + "!" }, v => { assert(false); v }) + assert(res === i + "!") + } + assert(map.size === 400) + } + + test("inserting in capacity-1 map") { + val map = new PrimitiveKeyOpenHashMap[Long, String](1) + for (i <- 1 to 100) { + map(i.toLong) = i.toString + } + assert(map.size === 100) + for (i <- 1 to 100) { + assert(map(i.toLong) === i.toString) + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-spark/blob/95c55df1/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala ---------------------------------------------------------------------- diff --git a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala b/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala deleted file mode 100644 index dfd6aed..0000000 --- a/core/src/test/scala/org/apache/spark/util/collection/PrimitiveKeyOpenHashSetSuite.scala +++ /dev/null @@ -1,90 +0,0 @@ -package org.apache.spark.util.collection - -import scala.collection.mutable.HashSet -import org.scalatest.FunSuite - -class PrimitiveKeyOpenHashSetSuite extends FunSuite { - - test("initialization") { - val goodMap1 = new PrimitiveKeyOpenHashMap[Int, Int](1) - assert(goodMap1.size === 0) - val goodMap2 = new PrimitiveKeyOpenHashMap[Int, Int](255) - assert(goodMap2.size === 0) - val goodMap3 = new PrimitiveKeyOpenHashMap[Int, Int](256) - assert(goodMap3.size === 0) - intercept[IllegalArgumentException] { - new PrimitiveKeyOpenHashMap[Int, Int](1 << 30) // Invalid map size: bigger than 2^29 - } - intercept[IllegalArgumentException] { - new PrimitiveKeyOpenHashMap[Int, Int](-1) - } - intercept[IllegalArgumentException] { - new PrimitiveKeyOpenHashMap[Int, Int](0) - } - } - - test("basic operations") { - val longBase = 1000000L - val map = new PrimitiveKeyOpenHashMap[Long, Int] - - for (i <- 1 to 1000) { - map(i + longBase) = i - assert(map(i + longBase) === i) - } - - assert(map.size === 1000) - - for (i <- 1 to 1000) { - assert(map(i + longBase) === i) - } - - // Test iterator - val set = new HashSet[(Long, Int)] - for ((k, v) <- map) { - set.add((k, v)) - } - assert(set === (1 to 1000).map(x => (x + longBase, x)).toSet) - } - - test("null values") { - val map = new PrimitiveKeyOpenHashMap[Long, String]() - for (i <- 1 to 100) { - map(i.toLong) = null - } - assert(map.size === 100) - assert(map(1.toLong) === null) - } - - test("changeValue") { - val map = new PrimitiveKeyOpenHashMap[Long, String]() - for (i <- 1 to 100) { - map(i.toLong) = i.toString - } - assert(map.size === 100) - for (i <- 1 to 100) { - val res = map.changeValue(i.toLong, { assert(false); "" }, v => { - assert(v === i.toString) - v + "!" - }) - assert(res === i + "!") - } - // Iterate from 101 to 400 to make sure the map grows a couple of times, because we had a - // bug where changeValue would return the wrong result when the map grew on that insert - for (i <- 101 to 400) { - val res = map.changeValue(i.toLong, { i + "!" }, v => { assert(false); v }) - assert(res === i + "!") - } - assert(map.size === 400) - } - - test("inserting in capacity-1 map") { - val map = new PrimitiveKeyOpenHashMap[Long, String](1) - for (i <- 1 to 100) { - map(i.toLong) = i.toString - } - assert(map.size === 100) - for (i <- 1 to 100) { - assert(map(i.toLong) === i.toString) - } - } -}
