andylam-db commented on code in PR #44405:
URL: https://github.com/apache/spark/pull/44405#discussion_r1433052187
##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala:
##########
@@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with
SharedSparkSession with SQLHelper
}
// List of SQL queries to run
- val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
+ tempQueries.map(_.trim).filter(_ != "")
// Fix misplacement when comment is at the end of the query.
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_
!= "")
+ }
+ protected def getSparkSettings(comments: Array[String]): Array[(String,
String)] = {
val settingLines = comments.filter(_.startsWith("--SET
")).map(_.substring(6))
- val settings = settingLines.flatMap(_.split(",").map { kv =>
+ settingLines.flatMap(_.split(",").map { kv =>
val (conf, value) = kv.span(_ != '=')
conf.trim -> value.substring(1).trim
})
+ }
- if (regenerateGoldenFiles) {
- runQueries(queries, testCase, settings.toImmutableArraySeq)
- } else {
- // A config dimension has multiple config sets, and a config set has
multiple configs.
- // - config dim: Seq[Seq[(String, String)]]
- // - config set: Seq[(String, String)]
- // - config: (String, String))
- // We need to do cartesian product for all the config dimensions, to get
a list of
- // config sets, and run the query once for each config set.
- val configDimLines =
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
- val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).transform
{ (_, lines) =>
- lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map {
kv =>
- val (conf, value) = kv.span(_ != '=')
- conf.trim -> value.substring(1).trim
- }.toSeq).toSeq
- }
+ protected def getSparkConfigDimensions(comments: Array[String]):
Seq[Seq[(String, String)]] = {
+ // A config dimension has multiple config sets, and a config set has
multiple configs.
+ // - config dim: Seq[Seq[(String, String)]]
+ // - config set: Seq[(String, String)]
+ // - config: (String, String))
+ // We need to do cartesian product for all the config dimensions, to get a
list of
+ // config sets, and run the query once for each config set.
+ val configDimLines =
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+ val configDims = configDimLines.groupBy(_.takeWhile(_ != '
')).view.mapValues { lines =>
+ lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv
=>
+ val (conf, value) = kv.span(_ != '=')
+ conf.trim -> value.substring(1).trim
+ }.toSeq).toSeq
+ }
- val configSets = configDims.values.foldLeft(Seq(Seq[(String,
String)]())) { (res, dim) =>
- dim.flatMap { configSet => res.map(_ ++ configSet) }
- }
+ configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+ dim.flatMap { configSet => res.map(_ ++ configSet) }
+ }
+ }
- configSets.foreach { configSet =>
- try {
- runQueries(queries, testCase, (settings ++
configSet).toImmutableArraySeq)
- } catch {
- case e: Throwable =>
- val configs = configSet.map {
- case (k, v) => s"$k=$v"
- }
- logError(s"Error using configs: ${configs.mkString(",")}")
- throw e
- }
+ protected def runQueriesWithSparkConfigDimensions(
+ queries: Seq[String],
+ testCase: TestCase,
+ sparkConfigSet: Array[(String, String)],
+ sparkConfigDims: Seq[Seq[(String, String)]]): Unit = {
Review Comment:
I think the additional clarification is good in the context of
https://github.com/apache/spark/pull/44084, where another DBMS is used for
cross-testing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]