Repository: spark
Updated Branches:
  refs/heads/master c3c45cbd7 -> 9063b17f3


[SPARK-25481][SQL][TEST] Refactor ColumnarBatchBenchmark to use main method

## What changes were proposed in this pull request?

Refactor `ColumnarBatchBenchmark` to use main method.
Generate benchmark result:
```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain 
org.apache.spark.sql.execution.vectorized.ColumnarBatchBenchmark"
```

## How was this patch tested?

manual tests

Closes #22490 from yucai/SPARK-25481.

Lead-authored-by: yucai <y...@ebay.com>
Co-authored-by: Yucai Yu <yucai...@foxmail.com>
Co-authored-by: Dongjoon Hyun <dongj...@apache.org>
Signed-off-by: Dongjoon Hyun <dongj...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/9063b17f
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/9063b17f
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/9063b17f

Branch: refs/heads/master
Commit: 9063b17f3d0f22b8e4142200259190a20f832a29
Parents: c3c45cb
Author: yucai <y...@ebay.com>
Authored: Wed Sep 26 20:40:10 2018 -0700
Committer: Dongjoon Hyun <dongj...@apache.org>
Committed: Wed Sep 26 20:40:10 2018 -0700

----------------------------------------------------------------------
 .../ColumnarBatchBenchmark-results.txt          | 59 ++++++++++++++
 .../vectorized/ColumnarBatchBenchmark.scala     | 84 ++++++--------------
 2 files changed, 85 insertions(+), 58 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/9063b17f/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt 
b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
new file mode 100644
index 0000000..5963716
--- /dev/null
+++ b/sql/core/benchmarks/ColumnarBatchBenchmark-results.txt
@@ -0,0 +1,59 @@
+================================================================================================
+Int Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Int Read/Write:                          Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Java Array                                     244 /  244       1342.3         
  0.7       1.0X
+ByteBuffer Unsafe                              445 /  445        736.5         
  1.4       0.5X
+ByteBuffer API                                2124 / 2125        154.3         
  6.5       0.1X
+DirectByteBuffer                               750 /  750        437.2         
  2.3       0.3X
+Unsafe Buffer                                  234 /  236       1401.3         
  0.7       1.0X
+Column(on heap)                                245 /  245       1335.6         
  0.7       1.0X
+Column(off heap)                               489 /  489        670.3         
  1.5       0.5X
+Column(off heap direct)                        236 /  236       1388.1         
  0.7       1.0X
+UnsafeRow (on heap)                            532 /  534        616.0         
  1.6       0.5X
+UnsafeRow (off heap)                           564 /  565        580.7         
  1.7       0.4X
+Column On Heap Append                          489 /  489        670.6         
  1.5       0.5X
+
+
+================================================================================================
+Boolean Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Boolean Read/Write:                      Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Bitset                                         879 /  879        381.9         
  2.6       1.0X
+Byte Array                                     794 /  794        422.6         
  2.4       1.1X
+
+
+================================================================================================
+String Read/Write
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+String Read/Write:                       Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+On Heap                                        449 /  449         36.5         
 27.4       1.0X
+Off Heap                                       679 /  679         24.1         
 41.4       0.7X
+
+
+================================================================================================
+Array Vector Read
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Array Vector Read:                       Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+On Heap Read Size Only                         713 /  713        229.8         
  4.4       1.0X
+Off Heap Read Size Only                        757 /  757        216.5         
  4.6       0.9X
+On Heap Read Elements                         3648 / 3650         44.9         
 22.3       0.2X
+Off Heap Read Elements                        5263 / 5265         31.1         
 32.1       0.1X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/9063b17f/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
index d69cf11..df6ab14 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/vectorized/ColumnarBatchBenchmark.scala
@@ -21,7 +21,7 @@ import java.nio.charset.StandardCharsets
 
 import scala.util.Random
 
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.memory.MemoryMode
 import org.apache.spark.sql.catalyst.expressions.UnsafeRow
 import org.apache.spark.sql.types.{ArrayType, BinaryType, IntegerType}
@@ -30,8 +30,15 @@ import org.apache.spark.util.collection.BitSet
 
 /**
  * Benchmark to low level memory access using different ways to manage buffers.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class>"
+ *      Results will be written to 
"benchmarks/ColumnarBatchBenchmark-results.txt".
+ * }}}
  */
-object ColumnarBatchBenchmark {
+object ColumnarBatchBenchmark extends BenchmarkBase {
   // This benchmark reads and writes an array of ints.
   // TODO: there is a big (2x) penalty for a random access API for off heap.
   // Note: carefully if modifying this code. It's hard to reason about the JIT.
@@ -260,25 +267,7 @@ object ColumnarBatchBenchmark {
       col.close
     }
 
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    Int Read/Write:                          Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Java Array                                     177 /  183       1851.1     
      0.5       1.0X
-    ByteBuffer Unsafe                              314 /  330       1043.7     
      1.0       0.6X
-    ByteBuffer API                                1298 / 1307        252.4     
      4.0       0.1X
-    DirectByteBuffer                               465 /  483        704.2     
      1.4       0.4X
-    Unsafe Buffer                                  179 /  183       1835.5     
      0.5       1.0X
-    Column(on heap)                                181 /  186       1815.2     
      0.6       1.0X
-    Column(off heap)                               344 /  349        951.7     
      1.1       0.5X
-    Column(off heap direct)                        178 /  186       1838.6     
      0.5       1.0X
-    UnsafeRow (on heap)                            388 /  394        844.8     
      1.2       0.5X
-    UnsafeRow (off heap)                           400 /  403        819.4     
      1.2       0.4X
-    Column On Heap Append                          315 /  325       1041.8     
      1.0       0.6X
-    */
-    val benchmark = new Benchmark("Int Read/Write", count * iters)
+    val benchmark = new Benchmark("Int Read/Write", count * iters, output = 
output)
     benchmark.addCase("Java Array")(javaArray)
     benchmark.addCase("ByteBuffer Unsafe")(byteBufferUnsafe)
     benchmark.addCase("ByteBuffer API")(byteBufferApi)
@@ -295,7 +284,7 @@ object ColumnarBatchBenchmark {
 
   def booleanAccess(iters: Int): Unit = {
     val count = 8 * 1024
-    val benchmark = new Benchmark("Boolean Read/Write", iters * count.toLong)
+    val benchmark = new Benchmark("Boolean Read/Write", iters * count.toLong, 
output = output)
     benchmark.addCase("Bitset") { i: Int => {
       val b = new BitSet(count)
       var sum = 0L
@@ -329,15 +318,6 @@ object ColumnarBatchBenchmark {
         }
       }
     }}
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    Boolean Read/Write:                      Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Bitset                                         741 /  747        452.6     
      2.2       1.0X
-    Byte Array                                     531 /  542        631.6     
      1.6       1.4X
-    */
     benchmark.run()
   }
 
@@ -386,16 +366,7 @@ object ColumnarBatchBenchmark {
       }
     }
 
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    String Read/Write:                       Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    On Heap                                        351 /  362         46.6     
     21.4       1.0X
-    Off Heap                                       456 /  466         35.9     
     27.8       0.8X
-    */
-    val benchmark = new Benchmark("String Read/Write", count * iters)
+    val benchmark = new Benchmark("String Read/Write", count * iters, output = 
output)
     benchmark.addCase("On Heap")(column(MemoryMode.ON_HEAP))
     benchmark.addCase("Off Heap")(column(MemoryMode.OFF_HEAP))
     benchmark.run
@@ -463,30 +434,27 @@ object ColumnarBatchBenchmark {
       }
     }
 
-    val benchmark = new Benchmark("Array Vector Read", count * iters)
+    val benchmark = new Benchmark("Array Vector Read", count * iters, output = 
output)
     benchmark.addCase("On Heap Read Size Only") { _ => readArrays(true) }
     benchmark.addCase("Off Heap Read Size Only") { _ => readArrays(false) }
     benchmark.addCase("On Heap Read Elements") { _ => readArrayElements(true) }
     benchmark.addCase("Off Heap Read Elements") { _ => 
readArrayElements(false) }
 
-    /*
-    Java HotSpot(TM) 64-Bit Server VM 1.8.0_60-b27 on Mac OS X 10.13.1
-    Intel(R) Core(TM) i7-4960HQ CPU @ 2.60GHz
-
-    Array Vector Read:                       Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    On Heap Read Size Only                         426 /  437        384.9     
      2.6       1.0X
-    Off Heap Read Size Only                        406 /  421        404.0     
      2.5       1.0X
-    On Heap Read Elements                         2636 / 2642         62.2     
     16.1       0.2X
-    Off Heap Read Elements                        3770 / 3774         43.5     
     23.0       0.1X
-    */
     benchmark.run
   }
 
-  def main(args: Array[String]): Unit = {
-    intAccess(1024 * 40)
-    booleanAccess(1024 * 40)
-    stringAccess(1024 * 4)
-    arrayAccess(1024 * 40)
+  override def benchmark(): Unit = {
+    runBenchmark("Int Read/Write") {
+      intAccess(1024 * 40)
+    }
+    runBenchmark("Boolean Read/Write") {
+      booleanAccess(1024 * 40)
+    }
+    runBenchmark("String Read/Write") {
+      stringAccess(1024 * 4)
+    }
+    runBenchmark("Array Vector Read") {
+      arrayAccess(1024 * 40)
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to