Repository: spark
Updated Branches:
  refs/heads/master f83fedc9f -> b2e325625


[SPARK-25490][SQL][TEST] Fix OOM of KryoBenchmark due to large 2D array and 
refactor it to use main method

## What changes were proposed in this pull request?

Before the code changes, I tried to run it with 8G memory:
```
build/sbt -mem 8000  "core/testOnly org.apache.spark.serializer.KryoBenchmark"
```
Still I got got OOM.

This is because the lengths of the arrays are random
https://github.com/apache/spark/blob/669ade3a8eed0016b5ece57d776cea0616417088/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala#L90-L91

And the 2D array is usually large: `10000 * Random.nextInt(0, 10000)`

This PR is to fix it and refactor it to use main method.

The benchmark result is also reason compared to the original one.

## How was this patch tested?

Run with
```
bin/spark-submit --class org.apache.spark.serializer.KryoBenchmark 
core/target/scala-2.11/spark-core_2.11-3.0.0-SNAPSHOT-tests.jar
```
and
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt  "core/test:runMain 
org.apache.spark.serializer.KryoBenchmark"

Closes #22663 from gengliangwang/kyroBenchmark.

Authored-by: Gengliang Wang <[email protected]>
Signed-off-by: Sean Owen <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b2e32562
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b2e32562
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b2e32562

Branch: refs/heads/master
Commit: b2e3256256e409d6f7b6e68e6ee26d532d778268
Parents: f83fedc
Author: Gengliang Wang <[email protected]>
Authored: Wed Oct 24 16:56:17 2018 -0500
Committer: Sean Owen <[email protected]>
Committed: Wed Oct 24 16:56:17 2018 -0500

----------------------------------------------------------------------
 core/benchmarks/KryoBenchmark-results.txt       | 29 +++++++
 .../apache/spark/serializer/KryoBenchmark.scala | 79 +++++++++-----------
 2 files changed, 65 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b2e32562/core/benchmarks/KryoBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/core/benchmarks/KryoBenchmark-results.txt 
b/core/benchmarks/KryoBenchmark-results.txt
new file mode 100644
index 0000000..91e22f3
--- /dev/null
+++ b/core/benchmarks/KryoBenchmark-results.txt
@@ -0,0 +1,29 @@
+================================================================================================
+Benchmark Kryo Unsafe vs safe Serialization
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6920HQ CPU @ 2.90GHz
+
+Benchmark Kryo Unsafe vs safe Serialization: Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+basicTypes: Int with unsafe:true               138 /  149          7.2         
138.0       1.0X
+basicTypes: Long with unsafe:true              168 /  173          6.0         
167.7       0.8X
+basicTypes: Float with unsafe:true             153 /  174          6.5         
153.1       0.9X
+basicTypes: Double with unsafe:true            161 /  185          6.2         
161.1       0.9X
+Array: Int with unsafe:true                      2 /    3        409.7         
  2.4      56.5X
+Array: Long with unsafe:true                     4 /    5        232.5         
  4.3      32.1X
+Array: Float with unsafe:true                    3 /    4        367.3         
  2.7      50.7X
+Array: Double with unsafe:true                   4 /    5        228.5         
  4.4      31.5X
+Map of string->Double  with unsafe:true         38 /   45         26.5         
 37.8       3.7X
+basicTypes: Int with unsafe:false              176 /  187          5.7         
175.9       0.8X
+basicTypes: Long with unsafe:false             191 /  203          5.2         
191.2       0.7X
+basicTypes: Float with unsafe:false            166 /  176          6.0         
166.2       0.8X
+basicTypes: Double with unsafe:false           174 /  190          5.7         
174.3       0.8X
+Array: Int with unsafe:false                    19 /   26         52.9         
 18.9       7.3X
+Array: Long with unsafe:false                   27 /   31         37.7         
 26.5       5.2X
+Array: Float with unsafe:false                   8 /   10        124.3         
  8.0      17.2X
+Array: Double with unsafe:false                 12 /   13         83.6         
 12.0      11.5X
+Map of string->Double  with unsafe:false        38 /   42         26.1         
 38.3       3.6X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/b2e32562/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala 
b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
index f4fc008..8a52c13 100644
--- a/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
+++ b/core/src/test/scala/org/apache/spark/serializer/KryoBenchmark.scala
@@ -20,58 +20,48 @@ package org.apache.spark.serializer
 import scala.reflect.ClassTag
 import scala.util.Random
 
-import org.apache.spark.{SparkConf, SparkFunSuite}
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.SparkConf
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.serializer.KryoTest._
 
-class KryoBenchmark extends SparkFunSuite {
-  val benchmark = new Benchmark("Benchmark Kryo Unsafe vs safe Serialization", 
1024 * 1024 * 15, 10)
-
-  ignore(s"Benchmark Kryo Unsafe vs safe Serialization") {
-    Seq (true, false).foreach (runBenchmark)
-    benchmark.run()
-
-    // scalastyle:off
-    /*
-      Benchmark Kryo Unsafe vs safe Serialization: Best/Avg Time(ms)    
Rate(M/s)   Per Row(ns)   Relative
-      
------------------------------------------------------------------------------------------------
-      basicTypes: Int with unsafe:true               151 /  170        104.2   
        9.6       1.0X
-      basicTypes: Long with unsafe:true              175 /  191         89.8   
       11.1       0.9X
-      basicTypes: Float with unsafe:true             177 /  184         88.8   
       11.3       0.9X
-      basicTypes: Double with unsafe:true            193 /  216         81.4   
       12.3       0.8X
-      Array: Int with unsafe:true                    513 /  587         30.7   
       32.6       0.3X
-      Array: Long with unsafe:true                  1211 / 1358         13.0   
       77.0       0.1X
-      Array: Float with unsafe:true                  890 /  964         17.7   
       56.6       0.2X
-      Array: Double with unsafe:true                1335 / 1428         11.8   
       84.9       0.1X
-      Map of string->Double  with unsafe:true        931 /  988         16.9   
       59.2       0.2X
-      basicTypes: Int with unsafe:false              197 /  217         79.9   
       12.5       0.8X
-      basicTypes: Long with unsafe:false             219 /  240         71.8   
       13.9       0.7X
-      basicTypes: Float with unsafe:false            208 /  217         75.7   
       13.2       0.7X
-      basicTypes: Double with unsafe:false           208 /  225         75.6   
       13.2       0.7X
-      Array: Int with unsafe:false                  2559 / 2681          6.1   
      162.7       0.1X
-      Array: Long with unsafe:false                 3425 / 3516          4.6   
      217.8       0.0X
-      Array: Float with unsafe:false                2025 / 2134          7.8   
      128.7       0.1X
-      Array: Double with unsafe:false               2241 / 2358          7.0   
      142.5       0.1X
-      Map of string->Double  with unsafe:false      1044 / 1085         15.1   
       66.4       0.1X
-    */
-    // scalastyle:on
+/**
+ * Benchmark for Kryo Unsafe vs safe Serialization.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar>
+ *   2. build/sbt "core/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "core/test:runMain <this 
class>"
+ *      Results will be written to "benchmarks/KryoBenchmark-results.txt".
+ * }}}
+ */
+object KryoBenchmark extends BenchmarkBase {
+
+  val N = 1000000
+  override def runBenchmarkSuite(): Unit = {
+    val name = "Benchmark Kryo Unsafe vs safe Serialization"
+    runBenchmark(name) {
+      val benchmark = new Benchmark(name, N, 10, output = output)
+      Seq(true, false).foreach(useUnsafe => run(useUnsafe, benchmark))
+      benchmark.run()
+    }
   }
 
-  private def runBenchmark(useUnsafe: Boolean): Unit = {
+  private def run(useUnsafe: Boolean, benchmark: Benchmark): Unit = {
     def check[T: ClassTag](t: T, ser: SerializerInstance): Int = {
-      if (ser.deserialize[T](ser.serialize(t)) === t) 1 else 0
+      if (ser.deserialize[T](ser.serialize(t)) == t) 1 else 0
     }
 
     // Benchmark Primitives
-    val basicTypeCount = 1000000
     def basicTypes[T: ClassTag](name: String, gen: () => T): Unit = {
       lazy val ser = createSerializer(useUnsafe)
-      val arrayOfBasicType: Array[T] = Array.fill(basicTypeCount)(gen())
+      val arrayOfBasicType: Array[T] = Array.fill(N)(gen())
 
       benchmark.addCase(s"basicTypes: $name with unsafe:$useUnsafe") { _ =>
         var sum = 0L
         var i = 0
-        while (i < basicTypeCount) {
+        while (i < N) {
           sum += check(arrayOfBasicType(i), ser)
           i += 1
         }
@@ -84,11 +74,12 @@ class KryoBenchmark extends SparkFunSuite {
     basicTypes("Double", () => Random.nextDouble())
 
     // Benchmark Array of Primitives
-    val arrayCount = 10000
+    val arrayCount = 4000
+    val arrayLength = N / arrayCount
     def basicTypeArray[T: ClassTag](name: String, gen: () => T): Unit = {
       lazy val ser = createSerializer(useUnsafe)
       val arrayOfArrays: Array[Array[T]] =
-        
Array.fill(arrayCount)(Array.fill[T](Random.nextInt(arrayCount))(gen()))
+        Array.fill(arrayCount)(Array.fill[T](arrayLength + 
Random.nextInt(arrayLength / 4))(gen()))
 
       benchmark.addCase(s"Array: $name with unsafe:$useUnsafe") { _ =>
         var sum = 0L
@@ -107,11 +98,13 @@ class KryoBenchmark extends SparkFunSuite {
     basicTypeArray("Double", () => Random.nextDouble())
 
     // Benchmark Maps
-    val mapsCount = 1000
+    val mapsCount = 200
+    val mapKeyLength = 20
+    val mapLength = N  / mapsCount / mapKeyLength
     lazy val ser = createSerializer(useUnsafe)
     val arrayOfMaps: Array[Map[String, Double]] = Array.fill(mapsCount) {
-      Array.fill(Random.nextInt(mapsCount)) {
-        (Random.nextString(mapsCount / 10), Random.nextDouble())
+      Array.fill(mapLength + Random.nextInt(mapLength / 4)) {
+        (Random.nextString(mapKeyLength), Random.nextDouble())
       }.toMap
     }
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to