Repository: spark Updated Branches: refs/heads/master a72d118cd -> 9bf04d854
[SPARK-25489][ML][TEST] Refactor UDTSerializationBenchmark ## What changes were proposed in this pull request? Refactor `UDTSerializationBenchmark` to use main method and print the output as a separate file. Run blow command to generate benchmark results: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain org.apache.spark.mllib.linalg.UDTSerializationBenchmark" ``` ## How was this patch tested? Manual tests. Closes #22499 from seancxmao/SPARK-25489. Authored-by: seancxmao <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9bf04d85 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9bf04d85 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9bf04d85 Branch: refs/heads/master Commit: 9bf04d8543d70ba8e55c970f2a8e2df872cf74f6 Parents: a72d118 Author: seancxmao <[email protected]> Authored: Sun Sep 23 13:34:06 2018 -0700 Committer: Dongjoon Hyun <[email protected]> Committed: Sun Sep 23 13:34:06 2018 -0700 ---------------------------------------------------------------------- .../UDTSerializationBenchmark-results.txt | 13 ++++ .../linalg/UDTSerializationBenchmark.scala | 70 ++++++++++---------- 2 files changed, 49 insertions(+), 34 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9bf04d85/mllib/benchmarks/UDTSerializationBenchmark-results.txt ---------------------------------------------------------------------- diff --git a/mllib/benchmarks/UDTSerializationBenchmark-results.txt b/mllib/benchmarks/UDTSerializationBenchmark-results.txt new file mode 100644 index 0000000..169f4c6 --- /dev/null +++ b/mllib/benchmarks/UDTSerializationBenchmark-results.txt @@ -0,0 +1,13 @@ +================================================================================================ +VectorUDT de/serialization +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz + +VectorUDT de/serialization: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +serialize 144 / 206 0.0 143979.7 1.0X +deserialize 114 / 135 0.0 113802.6 1.3X + + http://git-wip-us.apache.org/repos/asf/spark/blob/9bf04d85/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala ---------------------------------------------------------------------- diff --git a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala index e2976e1..1a2216e 100644 --- a/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala +++ b/mllib/src/test/scala/org/apache/spark/mllib/linalg/UDTSerializationBenchmark.scala @@ -17,53 +17,55 @@ package org.apache.spark.mllib.linalg -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.sql.catalyst.encoders.ExpressionEncoder /** * Serialization benchmark for VectorUDT. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class <this class> <spark mllib test jar> + * 2. build/sbt "mllib/test:runMain <this class>" + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "mllib/test:runMain <this class>" + * Results will be written to "benchmarks/UDTSerializationBenchmark-results.txt". + * }}} */ -object UDTSerializationBenchmark { +object UDTSerializationBenchmark extends BenchmarkBase { - def main(args: Array[String]): Unit = { - val iters = 1e2.toInt - val numRows = 1e3.toInt + override def benchmark(): Unit = { - val encoder = ExpressionEncoder[Vector].resolveAndBind() + runBenchmark("VectorUDT de/serialization") { + val iters = 1e2.toInt + val numRows = 1e3.toInt - val vectors = (1 to numRows).map { i => - Vectors.dense(Array.fill(1e5.toInt)(1.0 * i)) - }.toArray - val rows = vectors.map(encoder.toRow) + val encoder = ExpressionEncoder[Vector].resolveAndBind() - val benchmark = new Benchmark("VectorUDT de/serialization", numRows, iters) + val vectors = (1 to numRows).map { i => + Vectors.dense(Array.fill(1e5.toInt)(1.0 * i)) + }.toArray + val rows = vectors.map(encoder.toRow) - benchmark.addCase("serialize") { _ => - var sum = 0 - var i = 0 - while (i < numRows) { - sum += encoder.toRow(vectors(i)).numFields - i += 1 + val benchmark = new Benchmark("VectorUDT de/serialization", numRows, iters, output = output) + + benchmark.addCase("serialize") { _ => + var sum = 0 + var i = 0 + while (i < numRows) { + sum += encoder.toRow(vectors(i)).numFields + i += 1 + } } - } - benchmark.addCase("deserialize") { _ => - var sum = 0 - var i = 0 - while (i < numRows) { - sum += encoder.fromRow(rows(i)).numActives - i += 1 + benchmark.addCase("deserialize") { _ => + var sum = 0 + var i = 0 + while (i < numRows) { + sum += encoder.fromRow(rows(i)).numActives + i += 1 + } } - } - /* - OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64 - Intel Xeon E3-12xx v2 (Ivy Bridge) - VectorUDT de/serialization: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - serialize 265 / 318 0.0 265138.5 1.0X - deserialize 155 / 197 0.0 154611.4 1.7X - */ - benchmark.run() + benchmark.run() + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
