Github user kiszk commented on a diff in the pull request:
https://github.com/apache/spark/pull/19188#discussion_r138010133
--- Diff:
sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
---
@@ -113,12 +114,40 @@ object TPCDSQueryBenchmark {
"q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
"q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
+ val sparkConf = new SparkConf()
+ .setMaster("local[1]")
+ .setAppName("test-sql-context")
+ .set("spark.sql.parquet.compression.codec", "snappy")
+ .set("spark.sql.shuffle.partitions", "4")
+ .set("spark.driver.memory", "3g")
+ .set("spark.executor.memory", "3g")
+ .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 *
1024).toString)
+ .set("spark.sql.crossJoin.enabled", "true")
+
+ // If `spark.sql.tpcds.queryFilter` defined, this class filters the
queries that
+ // this option selects.
+ val queryFilter = sparkConf
+
.getOption("spark.sql.tpcds.queryFilter").map(_.split(",").map(_.trim).toSet)
+ .getOrElse(Set.empty)
+
+ val queriesToRun = if (queryFilter.nonEmpty) {
+ val queries = tpcdsAllQueries.filter { case queryName =>
queryFilter.contains(queryName) }
+ if (queries.isEmpty) {
+ throw new RuntimeException("Bad query name filter: " + queryFilter)
+ }
+ queries
+ } else {
+ tpcdsAllQueries
+ }
+
// In order to run this benchmark, please follow the instructions at
// https://github.com/databricks/spark-sql-perf/blob/master/README.md
to generate the TPCDS data
// locally (preferably with a scale factor of 5 for benchmarking).
Thereafter, the value of
// dataLocation below needs to be set to the location where the
generated data is stored.
val dataLocation = ""
- tpcdsAll(dataLocation, queries = tpcdsQueries)
+ val spark = SparkSession.builder.config(sparkConf).getOrCreate()
+ val tpcdsQueries = TpcdsQueries(spark, queries = queriesToRun,
dataLocation)
--- End diff --
nit: Do we need `queries =`?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]