Repository: spark
Updated Branches:
  refs/heads/master 3eb842969 -> b1328cc58


[SPARK-25658][SQL][TEST] Refactor HashByteArrayBenchmark to use main method

## What changes were proposed in this pull request?

Refactor `HashByteArrayBenchmark` to use main method.
1. use `spark-submit`:
```console
bin/spark-submit --class  org.apache.spark.sql.HashByteArrayBenchmark --jars 
./core/target/spark-core_2.11-3.0.0-SNAPSHOT-tests.jar 
./sql/catalyst/target/spark-catalyst_2.11-3.0.0-SNAPSHOT-tests.jar
```

2. Generate benchmark result:
```console
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain 
org.apache.spark.sql.HashByteArrayBenchmark"
```

## How was this patch tested?

manual tests

Closes #22652 from wangyum/SPARK-25658.

Lead-authored-by: Yuming Wang <wgy...@gmail.com>
Co-authored-by: Yuming Wang <yumw...@ebay.com>
Co-authored-by: Dongjoon Hyun <dongj...@apache.org>
Signed-off-by: Dongjoon Hyun <dongj...@apache.org>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/b1328cc5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/b1328cc5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/b1328cc5

Branch: refs/heads/master
Commit: b1328cc58ebb73bc191de5546735cffe0c68255e
Parents: 3eb8429
Author: Yuming Wang <wgy...@gmail.com>
Authored: Sun Oct 7 09:44:01 2018 -0700
Committer: Dongjoon Hyun <dongj...@apache.org>
Committed: Sun Oct 7 09:44:01 2018 -0700

----------------------------------------------------------------------
 .../HashByteArrayBenchmark-results.txt          |  77 ++++++++++++
 .../spark/sql/HashByteArrayBenchmark.scala      | 120 ++++---------------
 2 files changed, 102 insertions(+), 95 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/b1328cc5/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt 
b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
new file mode 100644
index 0000000..a4304ee
--- /dev/null
+++ b/sql/catalyst/benchmarks/HashByteArrayBenchmark-results.txt
@@ -0,0 +1,77 @@
+================================================================================================
+Benchmark for MurMurHash 3 and xxHash64
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8:          Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                  16 /   16        127.7         
  7.8       1.0X
+xxHash 64-bit                                   23 /   23         90.7         
 11.0       0.7X
+HiveHasher                                      16 /   16        134.8         
  7.4       1.1X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 16:         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                  26 /   26         79.5         
 12.6       1.0X
+xxHash 64-bit                                   26 /   27         79.3         
 12.6       1.0X
+HiveHasher                                      30 /   30         70.1         
 14.3       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 24:         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                  36 /   36         58.1         
 17.2       1.0X
+xxHash 64-bit                                   30 /   30         70.2         
 14.2       1.2X
+HiveHasher                                      45 /   45         46.4         
 21.5       0.8X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 31:         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                  50 /   50         41.8         
 23.9       1.0X
+xxHash 64-bit                                   43 /   43         49.3         
 20.3       1.2X
+HiveHasher                                      58 /   58         35.9         
 27.8       0.9X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 95:         Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                 132 /  132         15.9         
 62.7       1.0X
+xxHash 64-bit                                   79 /   79         26.7         
 37.5       1.7X
+HiveHasher                                     198 /  199         10.6         
 94.6       0.7X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 287:        Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                 334 /  334          6.3         
159.3       1.0X
+xxHash 64-bit                                  126 /  126         16.7         
 59.9       2.7X
+HiveHasher                                     633 /  634          3.3         
302.0       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 1055:       Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                1149 / 1149          1.8         
547.9       1.0X
+xxHash 64-bit                                  327 /  327          6.4         
155.9       3.5X
+HiveHasher                                    2338 / 2346          0.9        
1114.6       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 2079:       Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                2215 / 2216          0.9        
1056.1       1.0X
+xxHash 64-bit                                  554 /  554          3.8         
264.0       4.0X
+HiveHasher                                    4609 / 4609          0.5        
2197.5       0.5X
+
+OpenJDK 64-Bit Server VM 1.8.0_181-b13 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+Hash byte arrays with length 8223:       Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Murmur3_x86_32                                8633 / 8643          0.2        
4116.3       1.0X
+xxHash 64-bit                                 1891 / 1892          1.1         
901.6       4.6X
+HiveHasher                                  18206 / 18206          0.1        
8681.3       0.5X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/b1328cc5/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
index a60eb20..7dc865d 100644
--- 
a/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
+++ 
b/sql/catalyst/src/test/scala/org/apache/spark/sql/HashByteArrayBenchmark.scala
@@ -19,15 +19,24 @@ package org.apache.spark.sql
 
 import java.util.Random
 
-import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
 import org.apache.spark.sql.catalyst.expressions.{HiveHasher, XXH64}
 import org.apache.spark.unsafe.Platform
 import org.apache.spark.unsafe.hash.Murmur3_x86_32
 
 /**
  * Synthetic benchmark for MurMurHash 3 and xxHash64.
+ * To run this benchmark:
+ * {{{
+ *   1. without sbt:
+ *      bin/spark-submit --class <this class> --jars <spark core test jar> 
<spark catalyst test jar>
+ *   2. build/sbt "catalyst/test:runMain <this class>"
+ *   3. generate result:
+ *      SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "catalyst/test:runMain 
<this class>"
+ *      Results will be written to 
"benchmarks/HashByteArrayBenchmark-results.txt".
+ * }}}
  */
-object HashByteArrayBenchmark {
+object HashByteArrayBenchmark extends BenchmarkBase {
   def test(length: Int, seed: Long, numArrays: Int, iters: Int): Unit = {
     val random = new Random(seed)
     val arrays = Array.fill[Array[Byte]](numArrays) {
@@ -36,8 +45,8 @@ object HashByteArrayBenchmark {
       bytes
     }
 
-    val benchmark =
-      new Benchmark("Hash byte arrays with length " + length, iters * 
numArrays.toLong)
+    val benchmark = new Benchmark(
+      "Hash byte arrays with length " + length, iters * numArrays.toLong, 
output = output)
     benchmark.addCase("Murmur3_x86_32") { _: Int =>
       var sum = 0L
       for (_ <- 0L until iters) {
@@ -74,96 +83,17 @@ object HashByteArrayBenchmark {
     benchmark.run()
   }
 
-  def main(args: Array[String]): Unit = {
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 8:          Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                  12 /   16        174.3     
      5.7       1.0X
-    xxHash 64-bit                                   17 /   22        120.0     
      8.3       0.7X
-    HiveHasher                                      13 /   15        162.1     
      6.2       0.9X
-    */
-    test(8, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 16:         Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                  19 /   22        107.6     
      9.3       1.0X
-    xxHash 64-bit                                   20 /   24        104.6     
      9.6       1.0X
-    HiveHasher                                      24 /   28         87.0     
     11.5       0.8X
-    */
-    test(16, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 24:         Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                  28 /   32         74.8     
     13.4       1.0X
-    xxHash 64-bit                                   24 /   29         87.3     
     11.5       1.2X
-    HiveHasher                                      36 /   41         57.7     
     17.3       0.8X
-    */
-    test(24, 42L, 1 << 10, 1 << 11)
-
-    // Add 31 to all arrays to create worse case alignment for xxHash.
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 31:         Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                  41 /   45         51.1     
     19.6       1.0X
-    xxHash 64-bit                                   36 /   44         58.8     
     17.0       1.2X
-    HiveHasher                                      49 /   54         42.6     
     23.5       0.8X
-    */
-    test(31, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 95:         Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                 100 /  110         21.0     
     47.7       1.0X
-    xxHash 64-bit                                   74 /   78         28.2     
     35.5       1.3X
-    HiveHasher                                     189 /  196         11.1     
     90.3       0.5X
-    */
-    test(64 + 31, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 287:        Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                 299 /  311          7.0     
    142.4       1.0X
-    xxHash 64-bit                                  113 /  122         18.5     
     54.1       2.6X
-    HiveHasher                                     620 /  624          3.4     
    295.5       0.5X
-    */
-    test(256 + 31, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 1055:       Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                1068 / 1070          2.0     
    509.1       1.0X
-    xxHash 64-bit                                  306 /  315          6.9     
    145.9       3.5X
-    HiveHasher                                    2316 / 2369          0.9     
   1104.3       0.5X
-    */
-    test(1024 + 31, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 2079:       Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                2252 / 2274          0.9     
   1074.1       1.0X
-    xxHash 64-bit                                  534 /  580          3.9     
    254.6       4.2X
-    HiveHasher                                    4739 / 4786          0.4     
   2259.8       0.5X
-    */
-    test(2048 + 31, 42L, 1 << 10, 1 << 11)
-
-    /*
-    Intel(R) Core(TM) i7-4558U CPU @ 2.80GHz
-    Hash byte arrays with length 8223:       Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Murmur3_x86_32                                9249 / 9586          0.2     
   4410.5       1.0X
-    xxHash 64-bit                                 2897 / 3241          0.7     
   1381.6       3.2X
-    HiveHasher                                  19392 / 20211          0.1     
   9246.6       0.5X
-    */
-    test(8192 + 31, 42L, 1 << 10, 1 << 11)
+  override def runBenchmarkSuite(): Unit = {
+    runBenchmark("Benchmark for MurMurHash 3 and xxHash64") {
+      test(8, 42L, 1 << 10, 1 << 11)
+      test(16, 42L, 1 << 10, 1 << 11)
+      test(24, 42L, 1 << 10, 1 << 11)
+      test(31, 42L, 1 << 10, 1 << 11)
+      test(64 + 31, 42L, 1 << 10, 1 << 11)
+      test(256 + 31, 42L, 1 << 10, 1 << 11)
+      test(1024 + 31, 42L, 1 << 10, 1 << 11)
+      test(2048 + 31, 42L, 1 << 10, 1 << 11)
+      test(8192 + 31, 42L, 1 << 10, 1 << 11)
+    }
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

Reply via email to