spark git commit: [SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main method

dongjoon Tue, 06 Nov 2018 15:41:56 -0800

Repository: spark
Updated Branches:
  refs/heads/master a241a150d -> 63ca4bbe7



[SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main 
method

## What changes were proposed in this pull request?

Refactor BenchmarkWideTable to use main method.
Generate benchmark result:

```
SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain 
org.apache.spark.sql.execution.benchmark.WideTableBenchmark"
```

## How was this patch tested?

manual tests

Closes #22823 from yucai/BenchmarkWideTable.

Lead-authored-by: yucai <[email protected]>
Co-authored-by: Yucai Yu <[email protected]>
Co-authored-by: Dongjoon Hyun <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63ca4bbe
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63ca4bbe
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63ca4bbe

Branch: refs/heads/master
Commit: 63ca4bbe792718029f6d6196e8a6bb11d1f20fca
Parents: a241a15
Author: yucai <[email protected]>
Authored: Tue Nov 6 15:40:56 2018 -0800
Committer: Dongjoon Hyun <[email protected]>
Committed: Tue Nov 6 15:40:56 2018 -0800

----------------------------------------------------------------------
 .../benchmarks/WideTableBenchmark-results.txt   | 17 +++++++
 .../benchmark/BenchmarkWideTable.scala          | 52 --------------------
 .../benchmark/WideTableBenchmark.scala          | 52 ++++++++++++++++++++
 3 files changed, 69 insertions(+), 52 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/benchmarks/WideTableBenchmark-results.txt
----------------------------------------------------------------------
diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt 
b/sql/core/benchmarks/WideTableBenchmark-results.txt
new file mode 100644
index 0000000..3b41a3e
--- /dev/null
+++ b/sql/core/benchmarks/WideTableBenchmark-results.txt
@@ -0,0 +1,17 @@
+================================================================================================
+projection on wide table
+================================================================================================
+
+OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64
+Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz
+projection on wide table:                Best/Avg Time(ms)    Rate(M/s)   Per 
Row(ns)   Relative
+------------------------------------------------------------------------------------------------
+split threshold 10                          38932 / 39307          0.0       
37128.1       1.0X
+split threshold 100                         31991 / 32556          0.0       
30508.8       1.2X
+split threshold 1024                        10993 / 11041          0.1       
10483.5       3.5X
+split threshold 2048                          8959 / 8998          0.1        
8543.8       4.3X
+split threshold 4096                          8116 / 8134          0.1        
7739.8       4.8X
+split threshold 8196                          8069 / 8098          0.1        
7695.5       4.8X
+split threshold 65536                       57068 / 57339          0.0       
54424.3       0.7X
+
+

http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
deleted file mode 100644
index 76367cb..0000000
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *    http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.spark.sql.execution.benchmark
-
-import org.apache.spark.benchmark.Benchmark
-
-/**
- * Benchmark to measure performance for wide table.
- * To run this:
- *  build/sbt "sql/test-only *benchmark.BenchmarkWideTable"
- *
- * Benchmarks in this file are skipped in normal builds.
- */
-class BenchmarkWideTable extends BenchmarkWithCodegen {
-
-  ignore("project on wide table") {
-    val N = 1 << 20
-    val df = sparkSession.range(N)
-    val columns = (0 until 400).map{ i => s"id as id$i"}
-    val benchmark = new Benchmark("projection on wide table", N)
-    benchmark.addCase("wide table", numIters = 5) { iter =>
-      df.selectExpr(columns : _*).queryExecution.toRdd.count()
-    }
-    benchmark.run()
-
-    /**
-     * Here are some numbers with different split threshold:
-     *
-     *  Split threshold      methods       Rate(M/s)   Per Row(ns)
-     *  10                   400           0.4         2279
-     *  100                  200           0.6         1554
-     *  1k                   37            0.9         1116
-     *  8k                   5             0.5         2025
-     *  64k                  1             0.0        21649
-     */
-  }
-}

http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
new file mode 100644
index 0000000..ffefef1
--- /dev/null
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala
@@ -0,0 +1,52 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *    http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.Benchmark
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Benchmark to measure performance for wide table.
+ * {{{
+ *   To run this benchmark:
+ *   1. without sbt: bin/spark-submit --class <this class>
+ *        --jars <spark core test jar>,<spark catalyst test jar> <spark sql 
test jar>
+ *   2. build/sbt "sql/test:runMain <this class>"
+ *   3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt 
"sql/test:runMain <this class>"
+ *      Results will be written to "benchmarks/WideTableBenchmark-results.txt".
+ * }}}
+ */
+object WideTableBenchmark extends SqlBasedBenchmark {
+
+  override def runBenchmarkSuite(mainArgs: Array[String]): Unit = {
+    runBenchmark("projection on wide table") {
+      val N = 1 << 20
+      val df = spark.range(N)
+      val columns = (0 until 400).map{ i => s"id as id$i"}
+      val benchmark = new Benchmark("projection on wide table", N, output = 
output)
+      Seq("10", "100", "1024", "2048", "4096", "8192", "65536").foreach { n =>
+        benchmark.addCase(s"split threshold $n", numIters = 5) { iter =>
+          withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> n) {
+            df.selectExpr(columns: _*).foreach(identity(_))
+          }
+        }
+      }
+      benchmark.run()
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main method

Reply via email to