Github user dongjoon-hyun commented on a diff in the pull request:
https://github.com/apache/spark/pull/22484#discussion_r220304213
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/SqlBasedBenchmark.scala
---
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.execution.benchmark
+
+import org.apache.spark.benchmark.{Benchmark, BenchmarkBase}
+import org.apache.spark.sql.{AnalysisException, SparkSession}
+import org.apache.spark.sql.internal.SQLConf
+
+/**
+ * Common base trait to run benchmark with the Dataset and DataFrame API.
+ */
+trait SqlBasedBenchmark extends BenchmarkBase {
+
+ val spark: SparkSession = getSparkSession
+
+ /** Subclass can override this function to build their own SparkSession
*/
+ def getSparkSession: SparkSession = {
+ SparkSession.builder()
+ .master("local[1]")
+ .appName(this.getClass.getCanonicalName)
+ .config(SQLConf.SHUFFLE_PARTITIONS.key, 1)
+ .config(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key, 1)
+ .getOrCreate()
+ }
+
+ /**
+ * Sets all SQL configurations specified in `pairs`, calls `f`, and then
restores all SQL
+ * configurations.
+ */
+ protected def withSQLConf(pairs: (String, String)*)(f: => Unit): Unit = {
+ val conf = SQLConf.get
+ val (keys, values) = pairs.unzip
+ val currentValues = keys.map { key =>
+ if (conf.contains(key)) {
+ Some(conf.getConfString(key))
+ } else {
+ None
+ }
+ }
+ (keys, values).zipped.foreach { (k, v) =>
+ if (SQLConf.staticConfKeys.contains(k)) {
+ throw new AnalysisException(s"Cannot modify the value of a static
config: $k")
+ }
+ conf.setConfString(k, v)
+ }
+ try f finally {
+ keys.zip(currentValues).foreach {
+ case (key, Some(value)) => conf.setConfString(key, value)
+ case (key, None) => conf.unsetConf(key)
+ }
+ }
+ }
+
+ /** Runs function `f` with whole stage codegen on and off. */
+ def runBenchmarkWithCodegen(name: String, cardinality: Long)(f: =>
Unit): Unit = {
+ val benchmark = new Benchmark(name, cardinality, output = output)
+
+ benchmark.addCase(s"$name wholestage off", numIters = 2) { _ =>
+ withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> "false") {
--- End diff --
Is it too much to introduce a trait for `withSQLConf`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]