Repository: spark Updated Branches: refs/heads/master a241a150d -> 63ca4bbe7
[SPARK-25676][SQL][TEST] Rename and refactor BenchmarkWideTable to use main method ## What changes were proposed in this pull request? Refactor BenchmarkWideTable to use main method. Generate benchmark result: ``` SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain org.apache.spark.sql.execution.benchmark.WideTableBenchmark" ``` ## How was this patch tested? manual tests Closes #22823 from yucai/BenchmarkWideTable. Lead-authored-by: yucai <y...@ebay.com> Co-authored-by: Yucai Yu <yucai...@foxmail.com> Co-authored-by: Dongjoon Hyun <dongj...@apache.org> Signed-off-by: Dongjoon Hyun <dongj...@apache.org> Project: http://git-wip-us.apache.org/repos/asf/spark/repo Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/63ca4bbe Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/63ca4bbe Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/63ca4bbe Branch: refs/heads/master Commit: 63ca4bbe792718029f6d6196e8a6bb11d1f20fca Parents: a241a15 Author: yucai <y...@ebay.com> Authored: Tue Nov 6 15:40:56 2018 -0800 Committer: Dongjoon Hyun <dongj...@apache.org> Committed: Tue Nov 6 15:40:56 2018 -0800 ---------------------------------------------------------------------- .../benchmarks/WideTableBenchmark-results.txt | 17 +++++++ .../benchmark/BenchmarkWideTable.scala | 52 -------------------- .../benchmark/WideTableBenchmark.scala | 52 ++++++++++++++++++++ 3 files changed, 69 insertions(+), 52 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/benchmarks/WideTableBenchmark-results.txt ---------------------------------------------------------------------- diff --git a/sql/core/benchmarks/WideTableBenchmark-results.txt b/sql/core/benchmarks/WideTableBenchmark-results.txt new file mode 100644 index 0000000..3b41a3e --- /dev/null +++ b/sql/core/benchmarks/WideTableBenchmark-results.txt @@ -0,0 +1,17 @@ +================================================================================================ +projection on wide table +================================================================================================ + +OpenJDK 64-Bit Server VM 1.8.0_191-b12 on Linux 3.10.0-862.3.2.el7.x86_64 +Intel(R) Xeon(R) CPU E5-2670 v2 @ 2.50GHz +projection on wide table: Best/Avg Time(ms) Rate(M/s) Per Row(ns) Relative +------------------------------------------------------------------------------------------------ +split threshold 10 38932 / 39307 0.0 37128.1 1.0X +split threshold 100 31991 / 32556 0.0 30508.8 1.2X +split threshold 1024 10993 / 11041 0.1 10483.5 3.5X +split threshold 2048 8959 / 8998 0.1 8543.8 4.3X +split threshold 4096 8116 / 8134 0.1 7739.8 4.8X +split threshold 8196 8069 / 8098 0.1 7695.5 4.8X +split threshold 65536 57068 / 57339 0.0 54424.3 0.7X + + http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala deleted file mode 100644 index 76367cb..0000000 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/BenchmarkWideTable.scala +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.sql.execution.benchmark - -import org.apache.spark.benchmark.Benchmark - -/** - * Benchmark to measure performance for wide table. - * To run this: - * build/sbt "sql/test-only *benchmark.BenchmarkWideTable" - * - * Benchmarks in this file are skipped in normal builds. - */ -class BenchmarkWideTable extends BenchmarkWithCodegen { - - ignore("project on wide table") { - val N = 1 << 20 - val df = sparkSession.range(N) - val columns = (0 until 400).map{ i => s"id as id$i"} - val benchmark = new Benchmark("projection on wide table", N) - benchmark.addCase("wide table", numIters = 5) { iter => - df.selectExpr(columns : _*).queryExecution.toRdd.count() - } - benchmark.run() - - /** - * Here are some numbers with different split threshold: - * - * Split threshold methods Rate(M/s) Per Row(ns) - * 10 400 0.4 2279 - * 100 200 0.6 1554 - * 1k 37 0.9 1116 - * 8k 5 0.5 2025 - * 64k 1 0.0 21649 - */ - } -} http://git-wip-us.apache.org/repos/asf/spark/blob/63ca4bbe/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala ---------------------------------------------------------------------- diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala new file mode 100644 index 0000000..ffefef1 --- /dev/null +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/WideTableBenchmark.scala @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.sql.execution.benchmark + +import org.apache.spark.benchmark.Benchmark +import org.apache.spark.sql.internal.SQLConf + +/** + * Benchmark to measure performance for wide table. + * {{{ + * To run this benchmark: + * 1. without sbt: bin/spark-submit --class <this class> + * --jars <spark core test jar>,<spark catalyst test jar> <spark sql test jar> + * 2. build/sbt "sql/test:runMain <this class>" + * 3. generate result: SPARK_GENERATE_BENCHMARK_FILES=1 build/sbt "sql/test:runMain <this class>" + * Results will be written to "benchmarks/WideTableBenchmark-results.txt". + * }}} + */ +object WideTableBenchmark extends SqlBasedBenchmark { + + override def runBenchmarkSuite(mainArgs: Array[String]): Unit = { + runBenchmark("projection on wide table") { + val N = 1 << 20 + val df = spark.range(N) + val columns = (0 until 400).map{ i => s"id as id$i"} + val benchmark = new Benchmark("projection on wide table", N, output = output) + Seq("10", "100", "1024", "2048", "4096", "8192", "65536").foreach { n => + benchmark.addCase(s"split threshold $n", numIters = 5) { iter => + withSQLConf(SQLConf.CODEGEN_METHOD_SPLIT_THRESHOLD.key -> n) { + df.selectExpr(columns: _*).foreach(identity(_)) + } + } + } + benchmark.run() + } + } +} --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org