Github user kiszk commented on a diff in the pull request:

    https://github.com/apache/spark/pull/19188#discussion_r138010133
  
    --- Diff: 
sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala
 ---
    @@ -113,12 +114,40 @@ object TPCDSQueryBenchmark {
           "q81", "q82", "q83", "q84", "q85", "q86", "q87", "q88", "q89", "q90",
           "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99")
     
    +    val sparkConf = new SparkConf()
    +      .setMaster("local[1]")
    +      .setAppName("test-sql-context")
    +      .set("spark.sql.parquet.compression.codec", "snappy")
    +      .set("spark.sql.shuffle.partitions", "4")
    +      .set("spark.driver.memory", "3g")
    +      .set("spark.executor.memory", "3g")
    +      .set("spark.sql.autoBroadcastJoinThreshold", (20 * 1024 * 
1024).toString)
    +      .set("spark.sql.crossJoin.enabled", "true")
    +
    +    // If `spark.sql.tpcds.queryFilter` defined, this class filters the 
queries that
    +    // this option selects.
    +    val queryFilter = sparkConf
    +      
.getOption("spark.sql.tpcds.queryFilter").map(_.split(",").map(_.trim).toSet)
    +      .getOrElse(Set.empty)
    +
    +    val queriesToRun = if (queryFilter.nonEmpty) {
    +      val queries = tpcdsAllQueries.filter { case queryName => 
queryFilter.contains(queryName) }
    +      if (queries.isEmpty) {
    +        throw new RuntimeException("Bad query name filter: " + queryFilter)
    +      }
    +      queries
    +    } else {
    +      tpcdsAllQueries
    +    }
    +
         // In order to run this benchmark, please follow the instructions at
         // https://github.com/databricks/spark-sql-perf/blob/master/README.md 
to generate the TPCDS data
         // locally (preferably with a scale factor of 5 for benchmarking). 
Thereafter, the value of
         // dataLocation below needs to be set to the location where the 
generated data is stored.
         val dataLocation = ""
     
    -    tpcdsAll(dataLocation, queries = tpcdsQueries)
    +    val spark = SparkSession.builder.config(sparkConf).getOrCreate()
    +    val tpcdsQueries = TpcdsQueries(spark, queries = queriesToRun, 
dataLocation)
    --- End diff --
    
    nit: Do we need `queries =`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to