Repository: spark
Updated Branches:
  refs/heads/master a72d118cd -> 9bf04d854


[SPARK-25489][ML][TEST] Refactor UDTSerializationBenchmark

## What changes were proposed in this pull request?
Refactor `UDTSerializationBenchmark` to use main method and print the output as 
a separate file.

Run blow command to generate benchmark results:

```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain 
org.apache.spark.mllib.linalg.UDTSerializationBenchmark"
```

## How was this patch tested?
Manual tests.

Closes #22499 from seancxmao/SPARK-25489.

Authored-by: seancxmao <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9bf04d85
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9bf04d85
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9bf04d85

Branch: refs/heads/master
Commit: 9bf04d8543d70ba8e55c970f2a8e2df872cf74f6
Parents: a72d118
Author: seancxmao <[email protected]>
Authored: Sun Sep 23 13:34:06 2018 -0700
Committer: Dongjoon Hyun <[email protected]>
Committed: Sun Sep 23 13:34:06 2018 -0700

----------------------------------------------------------------------
 .../UDTSerializationBenchmark-results.txt       | 13 ++++
 .../linalg/UDTSerializationBenchmark.scala      | 70 ++++++++++----------
 2 files changed, 49 insertions(+), 34 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9bf04d85/mllib/benchmarks/UDTSerializationBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt 
b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
new file mode 100644
index 0000000..169f4c6
--- /dev/null
+++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+VectorUDT de/serialization
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+
+VectorUDT de/serialization:              Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+serialize                                      144 /  206          0.0      
143979.7       1.0X
+deserialize                                    114 /  135          0.0      
113802.6       1.3X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/9bf04d85/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
index e2976e1..1a2216e 100644
--- 
a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
+++ 
b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala
@@ -17,53 +17,55 @@
 
 package org.apache.spark.mllib.linalg
 
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder
 
 /**
  * Serialization benchmark for VectorUDT.
+ * To run this benchmark:
+ * {{{
+ * 1. without sbt: bin/spark-submit --class <this class> <spark mllib test jar>
+ * 2. build/sbt "mllib/test:runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"mllib/test:runMain <this class>"
+ *    Results will be written to 
"benchmarks/UDTSerializationBenchmark-results.txt".
+ * }}}
  */
-object UDTSerializationBenchmark {
+object UDTSerializationBenchmark extends BenchmarkBase {
 
-  def main(args: Array[String]): Unit = {
-    val iters = 1e2.toInt
-    val numRows = 1e3.toInt
+  override def benchmark(): Unit = {
 
-    val encoder = ExpressionEncoder[Vector].resolveAndBind()
+    runBenchmark("VectorUDT de/serialization") {
+      val iters = 1e2.toInt
+      val numRows = 1e3.toInt
 
-    val vectors = (1 to numRows).map { i =>
-      Vectors.dense(Array.fill(1e5.toInt)(1.0 * i))
-    }.toArray
-    val rows = vectors.map(encoder.toRow)
+      val encoder = ExpressionEncoder[Vector].resolveAndBind()
 
-    val benchmark = new Benchmark("VectorUDT de/serialization", numRows, iters)
+      val vectors = (1 to numRows).map { i =>
+        Vectors.dense(Array.fill(1e5.toInt)(1.0 * i))
+      }.toArray
+      val rows = vectors.map(encoder.toRow)
 
-    benchmark.addCase("serialize") { _ =>
-      var sum = 0
-      var i = 0
-      while (i < numRows) {
-        sum += encoder.toRow(vectors(i)).numFields
-        i += 1
+      val benchmark = new Benchmark("VectorUDT de/serialization", numRows, 
iters, output = output)
+
+      benchmark.addCase("serialize") { _ =>
+        var sum = 0
+        var i = 0
+        while (i < numRows) {
+          sum += encoder.toRow(vectors(i)).numFields
+          i += 1
+        }
       }
-    }
 
-    benchmark.addCase("deserialize") { _ =>
-      var sum = 0
-      var i = 0
-      while (i < numRows) {
-        sum += encoder.fromRow(rows(i)).numActives
-        i += 1
+      benchmark.addCase("deserialize") { _ =>
+        var sum = 0
+        var i = 0
+        while (i < numRows) {
+          sum += encoder.fromRow(rows(i)).numActives
+          i += 1
+        }
       }
-    }
 
-    /*
-    OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
-    Intel Xeon E3-12xx v2 (Ivy Bridge)
-    VectorUDT de/serialization:              Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    serialize                                      265 /  318          0.0     
 265138.5       1.0X
-    deserialize                                    155 /  197          0.0     
 154611.4       1.7X
-    */
-    benchmark.run()
+      benchmark.run()
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to