This is an automated email from the ASF dual-hosted git repository. yamamuro pushed a commit to branch branch-3.0 in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-3.0 by this push: new 577dbb9 [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark 577dbb9 is described below commit 577dbb96835f13f4cd92ea4caab9e6dece00be50 Author: Takeshi Yamamuro <yamam...@apache.org> AuthorDate: Wed Nov 11 15:24:05 2020 +0900 [SPARK-33417][SQL][TEST] Correct the behaviour of query filters in TPCDSQueryBenchmark ### What changes were proposed in this pull request? This PR intends to fix the behaviour of query filters in `TPCDSQueryBenchmark`. We can use an option `--query-filter` for selecting TPCDS queries to run, e.g., `--query-filter q6,q8,q13`. But, the current master has a weird behaviour about the option. For example, if we pass `--query-filter q6` so as to run the TPCDS q6 only, `TPCDSQueryBenchmark` runs `q6` and `q6-v2.7` because the `filterQueries` method does not respect the name suffix. So, there is no way now to run the TPCDS q6 only. ### Why are the changes needed? Bugfix. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Manually checked. Closes #30324 from maropu/FilterBugInTPCDSQueryBenchmark. Authored-by: Takeshi Yamamuro <yamam...@apache.org> Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> (cherry picked from commit 4b367976a877adb981f65d546e1522fdf30d0731) Signed-off-by: Takeshi Yamamuro <yamam...@apache.org> --- .../execution/benchmark/TPCDSQueryBenchmark.scala | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala index 7bbf079..43bc7c1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/benchmark/TPCDSQueryBenchmark.scala @@ -98,11 +98,16 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { } } - def filterQueries( + private def filterQueries( origQueries: Seq[String], - args: TPCDSQueryBenchmarkArguments): Seq[String] = { - if (args.queryFilter.nonEmpty) { - origQueries.filter(args.queryFilter.contains) + queryFilter: Set[String], + nameSuffix: String = ""): Seq[String] = { + if (queryFilter.nonEmpty) { + if (nameSuffix.nonEmpty) { + origQueries.filter { name => queryFilter.contains(s"$name$nameSuffix") } + } else { + origQueries.filter(queryFilter.contains) + } } else { origQueries } @@ -125,6 +130,7 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { "q91", "q92", "q93", "q94", "q95", "q96", "q97", "q98", "q99") // This list only includes TPC-DS v2.7 queries that are different from v1.4 ones + val nameSuffixForQueriesV2_7 = "-v2.7" val tpcdsQueriesV2_7 = Seq( "q5a", "q6", "q10a", "q11", "q12", "q14", "q14a", "q18a", "q20", "q22", "q22a", "q24", "q27a", "q34", "q35", "q35a", "q36a", "q47", "q49", @@ -132,8 +138,9 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { "q80a", "q86a", "q98") // If `--query-filter` defined, filters the queries that this option selects - val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs) - val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs) + val queriesV1_4ToRun = filterQueries(tpcdsQueries, benchmarkArgs.queryFilter) + val queriesV2_7ToRun = filterQueries(tpcdsQueriesV2_7, benchmarkArgs.queryFilter, + nameSuffix = nameSuffixForQueriesV2_7) if ((queriesV1_4ToRun ++ queriesV2_7ToRun).isEmpty) { throw new RuntimeException( @@ -143,6 +150,6 @@ object TPCDSQueryBenchmark extends SqlBasedBenchmark { val tableSizes = setupTables(benchmarkArgs.dataLocation) runTpcdsQueries(queryLocation = "tpcds", queries = queriesV1_4ToRun, tableSizes) runTpcdsQueries(queryLocation = "tpcds-v2.7.0", queries = queriesV2_7ToRun, tableSizes, - nameSuffix = "-v2.7") + nameSuffix = nameSuffixForQueriesV2_7) } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org