spark git commit: [SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark

kiszk Thu, 20 Sep 2018 23:06:33 -0700

Repository: spark
Updated Branches:
  refs/heads/master 596af211a -> 1f4ca6f5c



[SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark

## What changes were proposed in this pull request?
Refactor PrimitiveArrayBenchmark to use main method and print the output as a 
separate file.

Run blow command to generate benchmark results:

```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain 
org.apache.spark.sql.execution.benchmark.PrimitiveArrayBenchmark"
```

## How was this patch tested?
Manual tests.

Closes #22497 from seancxmao/SPARK-25487.

Authored-by: seancxmao <[email protected]>
Signed-off-by: Kazuaki Ishizaki <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/1f4ca6f5
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/1f4ca6f5
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/1f4ca6f5

Branch: refs/heads/master
Commit: 1f4ca6f5c52560585ea977bddc69243a29bf67f2
Parents: 596af21
Author: seancxmao <[email protected]>
Authored: Fri Sep 21 15:04:47 2018 +0900
Committer: Kazuaki Ishizaki <[email protected]>
Committed: Fri Sep 21 15:04:47 2018 +0900

----------------------------------------------------------------------
 .../PrimitiveArrayBenchmark-results.txt         | 13 ++++++
 .../benchmark/PrimitiveArrayBenchmark.scala     | 47 +++++++++-----------
 2 files changed, 35 insertions(+), 25 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt 
b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
new file mode 100644
index 0000000..b06b5c0
--- /dev/null
+++ b/sql/core/benchmarks/PrimitiveArrayBenchmark-results.txt
@@ -0,0 +1,13 @@
+================================================================================================
+Write primitive arrays in dataset
+================================================================================================
+
+Java HotSpot(TM) 64-Bit Server VM 1.8.0_131-b11 on Mac OS X 10.13.6
+Intel(R) Core(TM) i7-6820HQ CPU @ 2.70GHz
+
+Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+Int                                            437 /  529         19.2         
 52.1       1.0X
+Double                                         638 /  670         13.1         
 76.1       0.7X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/1f4ca6f5/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
index e7c8f27..7f467d1 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/PrimitiveArrayBenchmark.scala
@@ -17,21 +17,30 @@
 
 package org.apache.spark.sql.execution.benchmark
 
-import scala.concurrent.duration._
-
-import org.apache.spark.SparkConf
-import org.apache.spark.sql.catalyst.util._
-import org.apache.spark.util.Benchmark
+import org.apache.spark.sql.SparkSession
+import org.apache.spark.util.{Benchmark, BenchmarkBase => FileBenchmarkBase}
 
 /**
- * Benchmark [[PrimitiveArray]] for DataFrame and Dataset program using 
primitive array
- * To run this:
- *  1. replace ignore(...) with test(...)
- *  2. build/sbt "sql/test-only *benchmark.PrimitiveArrayBenchmark"
- *
- * Benchmarks in this file are skipped in normal builds.
+ * Benchmark primitive arrays via DataFrame and Dataset program using 
primitive arrays
+ * To run this benchmark:
+ * 1. without sbt: bin/spark-submit --class <this class> <spark sql test jar>
+ * 2. build/sbt "sql/test:runMain <this class>"
+ * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class>"
+ *    Results will be written to 
"benchmarks/PrimitiveArrayBenchmark-results.txt".
  */
-class PrimitiveArrayBenchmark extends BenchmarkBase {
+object PrimitiveArrayBenchmark extends FileBenchmarkBase {
+  lazy val sparkSession = SparkSession.builder
+    .master("local[1]")
+    .appName("microbenchmark")
+    .config("spark.sql.shuffle.partitions", 1)
+    .config("spark.sql.autoBroadcastJoinThreshold", 1)
+    .getOrCreate()
+
+  override def benchmark(): Unit = {
+    runBenchmark("Write primitive arrays in dataset") {
+      writeDatasetArray(4)
+    }
+  }
 
   def writeDatasetArray(iters: Int): Unit = {
     import sparkSession.implicits._
@@ -62,21 +71,9 @@ class PrimitiveArrayBenchmark extends BenchmarkBase {
       }
     }
 
-    val benchmark = new Benchmark("Write an array in Dataset", count * iters)
+    val benchmark = new Benchmark("Write an array in Dataset", count * iters, 
output = output)
     benchmark.addCase("Int   ")(intArray)
     benchmark.addCase("Double")(doubleArray)
     benchmark.run
-    /*
-    OpenJDK 64-Bit Server VM 1.8.0_91-b14 on Linux 4.4.11-200.fc22.x86_64
-    Intel Xeon E3-12xx v2 (Ivy Bridge)
-    Write an array in Dataset:               Best/Avg Time(ms)    Rate(M/s)   
Per Row(ns)   Relative
-    
------------------------------------------------------------------------------------------------
-    Int                                            352 /  401         23.8     
     42.0       1.0X
-    Double                                         821 /  885         10.2     
     97.9       0.4X
-    */
-  }
-
-  ignore("Write an array in Dataset") {
-    writeDatasetArray(4)
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-25487][SQL][TEST] Refactor PrimitiveArrayBenchmark

Reply via email to