cloud-fan commented on a change in pull request #33510:
URL: https://github.com/apache/spark/pull/33510#discussion_r748317106
##########
File path:
sql/core/src/test/scala/org/apache/spark/sql/TPCDSQueryTestSuite.scala
##########
@@ -97,50 +97,91 @@ class TPCDSQueryTestSuite extends QueryTest with TPCDSBase
with SQLQueryTestHelp
""".stripMargin)
}
- private def runQuery(query: String, goldenFile: File): Unit = {
- val (schema, output) = handleExceptions(getNormalizedResult(spark, query))
- val queryString = query.trim
- val outputString = output.mkString("\n").replaceAll("\\s+$", "")
- if (regenerateGoldenFiles) {
- val goldenOutput = {
- s"-- Automatically generated by ${getClass.getSimpleName}\n\n" +
- s"-- !query schema\n" +
- schema + "\n" +
- s"-- !query output\n" +
- outputString +
- "\n"
+ private def runQuery(query: String, goldenFile: File, conf: Seq[(String,
String)], needSort: Boolean): Unit = {
+ withSQLConf(conf: _*) {
+ try {
+ val (schema, output) = handleExceptions(getNormalizedResult(spark,
query))
+ val queryString = query.trim
+ val outputString = output.mkString("\n").replaceAll("\\s+$", "")
+ if (regenerateGoldenFiles) {
+ val goldenOutput = {
+ s"-- Automatically generated by ${getClass.getSimpleName}\n\n" +
+ s"-- !query schema\n" +
+ schema + "\n" +
+ s"-- !query output\n" +
+ outputString +
+ "\n"
+ }
+ val parent = goldenFile.getParentFile
+ if (!parent.exists()) {
+ assert(parent.mkdirs(), "Could not create directory: " + parent)
+ }
+ stringToFile(goldenFile, goldenOutput)
+ }
+
+ // Read back the golden file.
+ val (expectedSchema, expectedOutput) = {
+ val goldenOutput = fileToString(goldenFile)
+ val segments = goldenOutput.split("-- !query.*\n")
+
+ // query has 3 segments, plus the header
+ assert(segments.size == 3,
+ s"Expected 3 blocks in result file but got ${segments.size}. " +
+ "Try regenerate the result files.")
+
+ (segments(1).trim, segments(2).replaceAll("\\s+$", ""))
+ }
+
+ assertResult(expectedSchema, s"Schema did not match\n$queryString") {
+ schema
+ }
+ if (needSort) {
+ val expectSorted = expectedOutput.split("\n").sorted.map(_.trim)
+ .mkString("\n").replaceAll("\\s+$", "")
+ val outputSorted =
output.sorted.map(_.trim).mkString("\n").replaceAll("\\s+$", "")
+ assertResult(expectSorted, s"Result did not match\n$queryString") {
+ outputSorted
+ }
+ } else {
+ assertResult(expectedOutput, s"Result did not match\n$queryString") {
+ outputString
+ }
+ }
+ } catch {
+ case e: Throwable =>
+ val configs = conf.map {
+ case (k, v) => s"$k=$v"
+ }
+ throw new Exception(s"${e.getMessage} \nError using
configs:\n${configs.mkString("\n")}")
}
- val parent = goldenFile.getParentFile
- if (!parent.exists()) {
- assert(parent.mkdirs(), "Could not create directory: " + parent)
- }
- stringToFile(goldenFile, goldenOutput)
}
+ }
- // Read back the golden file.
- val (expectedSchema, expectedOutput) = {
- val goldenOutput = fileToString(goldenFile)
- val segments = goldenOutput.split("-- !query.*\n")
+ val sortMergeJoinConf = Map(
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+ SQLConf.PREFER_SORTMERGEJOIN.key -> "true")
- // query has 3 segments, plus the header
- assert(segments.size == 3,
- s"Expected 3 blocks in result file but got ${segments.size}. " +
- "Try regenerate the result files.")
+ val broadcastHashJoinConf = Map(SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key ->
"10485760")
- (segments(1).trim, segments(2).replaceAll("\\s+$", ""))
- }
+ val shuffleHashJoinConf = Map(
+ SQLConf.AUTO_BROADCASTJOIN_THRESHOLD.key -> "-1",
+ "spark.sql.join.forceApplyShuffledHashJoin" -> "true")
- assertResult(expectedSchema, s"Schema did not match\n$queryString") {
schema }
- assertResult(expectedOutput, s"Result did not match\n$queryString") {
outputString }
- }
+ val joinConfSet: Set[Map[String, String]] =
+ Set(broadcastHashJoinConf, shuffleHashJoinConf);
Review comment:
nit: it's a bit weird that `joinConfSet` doesn't include
`sortMergeJoinConf`. One idea is
```
runQuery(queryString, goldenFile, joinConfSet.head, false)
if (!regenerateGoldenFiles) {
joinConfSet.tail.foreach { conf =>
runQuery(queryString, goldenFile, conf.toSeq, true)
}
}
```
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]