Repository: spark Updated Branches: refs/heads/master c3c45cbd7 -> 9063b17f3
[SPARK-25481][SQL][TEST] Refactor ColumnarBatchBenchmark to use main method ## What changes were proposed in this pull request? Refactor `ColumnarBatchBenchmark` to use main method. Generate benchmark result: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.vectorized.ColumnarBatchBenchmark" ``` ## How was this patch tested? manual tests Closes #22490 from yucai/SPARK-25481. Lead-authored-by: yucai <y...@ebay.com> Co-authored-by: Yucai Yu <yucai...@foxmail.com> Co-authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9063b17f Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9063b17f Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9063b17f Branch: refs/heads/master Commit: 9063b17f3d0f22b8e4142200259190a20f832a29 Parents: c3c45cb Author: yucai <y...@ebay.com> Authored: Wed Sep 26 20:40:10 2018 -0700 Committer: Dongjoon Hyun <dongj...@apache.org> Committed: Wed Sep 26 20:40:10 2018 -0700 ---------------------------------------------------------------------- .../ColumnarBatchBenchmark-results.txt | 59 ++++++++++++++ .../vectorized/ColumnarBatchBenchmark.scala | 84 ++++++-------------- 2 files changed, 85 insertions(+), 58 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/9063b17f/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt ---------------------------------------------------------------------- diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt new file mode 100644 index 0000000..5963716 --- /dev/null +++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt @@ -0,0 +1,59 @@ +================================================================================================ +Int Read/Write +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Int Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Java Array 244 / 244 1342.3 0.7 1.0X +ByteBuffer Unsafe 445 / 445 736.5 1.4 0.5X +ByteBuffer API 2124 / 2125 154.3 6.5 0.1X +DirectByteBuffer 750 / 750 437.2 2.3 0.3X +Unsafe Buffer 234 / 236 1401.3 0.7 1.0X +Column(on heap) 245 / 245 1335.6 0.7 1.0X +Column(off heap) 489 / 489 670.3 1.5 0.5X +Column(off heap direct) 236 / 236 1388.1 0.7 1.0X +UnsafeRow (on heap) 532 / 534 616.0 1.6 0.5X +UnsafeRow (off heap) 564 / 565 580.7 1.7 0.4X +Column On Heap Append 489 / 489 670.6 1.5 0.5X + + +================================================================================================ +Boolean Read/Write +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Boolean Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +Bitset 879 / 879 381.9 2.6 1.0X +Byte Array 794 / 794 422.6 2.4 1.1X + + +================================================================================================ +String Read/Write +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +String Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +On Heap 449 / 449 36.5 27.4 1.0X +Off Heap 679 / 679 24.1 41.4 0.7X + + +================================================================================================ +Array Vector Read +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +Array Vector Read: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +On Heap Read Size Only 713 / 713 229.8 4.4 1.0X +Off Heap Read Size Only 757 / 757 216.5 4.6 0.9X +On Heap Read Elements 3648 / 3650 44.9 22.3 0.2X +Off Heap Read Elements 5263 / 5265 31.1 32.1 0.1X + + http://git-wip-us.apache.org/repos/asf/spark/blob/9063b17f/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala index d69cf11..df6ab14 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala @@ -21,7 +21,7 @@ import java.nio.charset.StandardCharsets import scala.util.Random -import org.apache.spark.benchmark.Benchmark +import org.apache.spark.benchmark.{Benchmark, BenchmarkBase} import org.apache.spark.memory.MemoryMode import org.apache.spark.sql.catalyst.expressions.UnsafeRow import org.apache.spark.sql.types.{ArrayType, BinaryType, IntegerType} @@ -30,8 +30,15 @@ import org.apache.spark.util.collection.BitSet /** * Benchmark to low level memory access using different ways to manage buffers. + * To run this benchmark: + * {{{ + * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar> + * 2. build/sbt "sql/test:runMain <this class>" + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>" + * Results will be written to "benchmarks/ColumnarBatchBenchmark-results.txt". + * }}} */ -object ColumnarBatchBenchmark { +object ColumnarBatchBenchmark extends BenchmarkBase { // This benchmark reads and writes an array of ints. // TODO: there is a big (2x) penalty for a random access API for off heap. // Note: carefully if modifying this code. It's hard to reason about the JIT. @@ -260,25 +267,7 @@ object ColumnarBatchBenchmark { col.close } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Int Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Java Array 177 / 183 1851.1 0.5 1.0X - ByteBuffer Unsafe 314 / 330 1043.7 1.0 0.6X - ByteBuffer API 1298 / 1307 252.4 4.0 0.1X - DirectByteBuffer 465 / 483 704.2 1.4 0.4X - Unsafe Buffer 179 / 183 1835.5 0.5 1.0X - Column(on heap) 181 / 186 1815.2 0.6 1.0X - Column(off heap) 344 / 349 951.7 1.1 0.5X - Column(off heap direct) 178 / 186 1838.6 0.5 1.0X - UnsafeRow (on heap) 388 / 394 844.8 1.2 0.5X - UnsafeRow (off heap) 400 / 403 819.4 1.2 0.4X - Column On Heap Append 315 / 325 1041.8 1.0 0.6X - */ - val benchmark = new Benchmark("Int Read/Write", count * iters) + val benchmark = new Benchmark("Int Read/Write", count * iters, output = output) benchmark.addCase("Java Array")(javaArray) benchmark.addCase("ByteBuffer Unsafe")(byteBufferUnsafe) benchmark.addCase("ByteBuffer API")(byteBufferApi) @@ -295,7 +284,7 @@ object ColumnarBatchBenchmark { def booleanAccess(iters: Int): Unit = { val count = 8 * 1024 - val benchmark = new Benchmark("Boolean Read/Write", iters * count.toLong) + val benchmark = new Benchmark("Boolean Read/Write", iters * count.toLong, output = output) benchmark.addCase("Bitset") { i: Int => { val b = new BitSet(count) var sum = 0L @@ -329,15 +318,6 @@ object ColumnarBatchBenchmark { } } }} - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Boolean Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - Bitset 741 / 747 452.6 2.2 1.0X - Byte Array 531 / 542 631.6 1.6 1.4X - */ benchmark.run() } @@ -386,16 +366,7 @@ object ColumnarBatchBenchmark { } } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - String Read/Write: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - On Heap 351 / 362 46.6 21.4 1.0X - Off Heap 456 / 466 35.9 27.8 0.8X - */ - val benchmark = new Benchmark("String Read/Write", count * iters) + val benchmark = new Benchmark("String Read/Write", count * iters, output = output) benchmark.addCase("On Heap")(column(MemoryMode.ON_HEAP)) benchmark.addCase("Off Heap")(column(MemoryMode.OFF_HEAP)) benchmark.run @@ -463,30 +434,27 @@ object ColumnarBatchBenchmark { } } - val benchmark = new Benchmark("Array Vector Read", count * iters) + val benchmark = new Benchmark("Array Vector Read", count * iters, output = output) benchmark.addCase("On Heap Read Size Only") { _ => readArrays(true) } benchmark.addCase("Off Heap Read Size Only") { _ => readArrays(false) } benchmark.addCase("On Heap Read Elements") { _ => readArrayElements(true) } benchmark.addCase("Off Heap Read Elements") { _ => readArrayElements(false) } - /* - Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1 - Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz - - Array Vector Read: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative - ------------------------------------------------------------------------------------------------ - On Heap Read Size Only 426 / 437 384.9 2.6 1.0X - Off Heap Read Size Only 406 / 421 404.0 2.5 1.0X - On Heap Read Elements 2636 / 2642 62.2 16.1 0.2X - Off Heap Read Elements 3770 / 3774 43.5 23.0 0.1X - */ benchmark.run } - def main(args: Array[String]): Unit = { - intAccess(1024 * 40) - booleanAccess(1024 * 40) - stringAccess(1024 * 4) - arrayAccess(1024 * 40) + override def benchmark(): Unit = { + runBenchmark("Int Read/Write") { + intAccess(1024 * 40) + } + runBenchmark("Boolean Read/Write") { + booleanAccess(1024 * 40) + } + runBenchmark("String Read/Write") { + stringAccess(1024 * 4) + } + runBenchmark("Array Vector Read") { + arrayAccess(1024 * 40) + } } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org