Repository: spark Updated Branches: refs/heads/master 596af211a -> 1f4ca6f5c
[SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark ## What changes were proposed in this pull request? Refactor PrimitiveArrayBenchmark to use main method and print the output as a separate file. Run blow command to generate benchmark results: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.PrimitiveArrayBenchmark" ``` ## How was this patch tested? Manual tests. Closes #22497 from seancxmao/SPARK-25487. Authored-by: seancxmao <[email protected]> Signed-off-by: Kazuaki Ishizaki <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f4ca6f5 Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f4ca6f5 Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f4ca6f5 Branch: refs/heads/master Commit: 1f4ca6f5c52560585ea977bddc69243a29bf67f2 Parents: 596af21 Author: seancxmao <[email protected]> Authored: Fri Sep 21 15:04:47 2018 +0900 Committer: Kazuaki Ishizaki <[email protected]> Committed: Fri Sep 21 15:04:47 2018 +0900 ---------------------------------------------------------------------- .../PrimitiveArrayBenchmark-results.txt | 13 ++++++ .../benchmark/PrimitiveArrayBenchmark.scala | 47 +++++++++----------- 2 files changed, 35 insertions(+), 25 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt ---------------------------------------------------------------------- diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt new file mode 100644 index 0000000..b06b5c0 --- /dev/null +++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt @@ -0,0 +1,13 @@ +================================================================================================ +Write primitive arrays in dataset +================================================================================================ + +Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6 +Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz + +Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Int 437 / 529 19.2 52.1 1.0X +Double 638 / 670 13.1 76.1 0.7X + + http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala index e7c8f27..7f467d1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala @@ -17,21 +17,30 @@ package org.apache.spark.sql.execution.benchmark -import scala.concurrent.duration._ - -import org.apache.spark.SparkConf -import org.apache.spark.sql.catalyst.util._ -import org.apache.spark.util.Benchmark +import org.apache.spark.sql.SparkSession +import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase} /** - * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using primitive array - * To run this: - * 1. replace ignore(...) with test(...) - * 2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark" - * - * Benchmarks in this file are skipped in normal builds. + * Benchmark primitive arrays via DataFrame and Dataset program using primitive arrays + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar> + * 2. build/sbt "sql/test:runMain <this class>" + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>" + * Results will be written to "benchmarks/PrimitiveArrayBenchmark-results.txt". */ -class PrimitiveArrayBenchmark extends BenchmarkBase { +object PrimitiveArrayBenchmark extends FileBenchmarkBase { + lazy val sparkSession = SparkSession.builder + .master("local[1]") + .appName("microbenchmark") + .config("spark.sql.shuffle.partitions", 1) + .config("spark.sql.autoBroadcastJoinThreshold", 1) + .getOrCreate() + + override def benchmark(): Unit = { + runBenchmark("Write primitive arrays in dataset") { + writeDatasetArray(4) + } + } def writeDatasetArray(iters: Int): Unit = { import sparkSession.implicits._ @@ -62,21 +71,9 @@ class PrimitiveArrayBenchmark extends BenchmarkBase { } } - val benchmark = new Benchmark("Write an array in Dataset", count * iters) + val benchmark = new Benchmark("Write an array in Dataset", count * iters, output = output) benchmark.addCase("Int ")(intArray) benchmark.addCase("Double")(doubleArray) benchmark.run - /* - OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64 - Intel Xeon E3-12xx v2 (Ivy Bridge) - Write an array in Dataset: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Int 352 / 401 23.8 42.0 1.0X - Double 821 / 885 10.2 97.9 0.4X - */ - } - - ignore("Write an array in Dataset") { - writeDatasetArray(4) } } --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
