Re: [PR] [SPARK-46179][SQL] Pull out code into reusable functions in SQLQueryTestSuite [spark]

via GitHub Wed, 20 Dec 2023 05:13:43 -0800


beliefer commented on code in PR #44405:
URL: https://github.com/apache/spark/pull/44405#discussion_r1432695753



##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala:
##########
@@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSparkSession with SQLHelper
     }
 
     // List of SQL queries to run
-    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
+    tempQueries.map(_.trim).filter(_ != "")
       // Fix misplacement when comment is at the end of the query.
       
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_
 != "")
+  }
 
+  protected def getSparkSettings(comments: Array[String]): Array[(String, 
String)] = {
     val settingLines = comments.filter(_.startsWith("--SET 
")).map(_.substring(6))
-    val settings = settingLines.flatMap(_.split(",").map { kv =>
+    settingLines.flatMap(_.split(",").map { kv =>
       val (conf, value) = kv.span(_ != '=')
       conf.trim -> value.substring(1).trim
     })
+  }
 
-    if (regenerateGoldenFiles) {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
-    } else {
-      // A config dimension has multiple config sets, and a config set has 
multiple configs.
-      // - config dim:     Seq[Seq[(String, String)]]
-      //   - config set:   Seq[(String, String)]
-      //     - config:     (String, String))
-      // We need to do cartesian product for all the config dimensions, to get 
a list of
-      // config sets, and run the query once for each config set.
-      val configDimLines = 
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
-      val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).transform 
{ (_, lines) =>
-        lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { 
kv =>
-          val (conf, value) = kv.span(_ != '=')
-          conf.trim -> value.substring(1).trim
-        }.toSeq).toSeq
-      }
+  protected def getSparkConfigDimensions(comments: Array[String]): 
Seq[Seq[(String, String)]] = {
+    // A config dimension has multiple config sets, and a config set has 
multiple configs.
+    // - config dim:     Seq[Seq[(String, String)]]
+    //   - config set:   Seq[(String, String)]
+    //     - config:     (String, String))
+    // We need to do cartesian product for all the config dimensions, to get a 
list of
+    // config sets, and run the query once for each config set.
+    val configDimLines = 
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+    val configDims = configDimLines.groupBy(_.takeWhile(_ != ' 
')).view.mapValues { lines =>
+      lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv 
=>
+        val (conf, value) = kv.span(_ != '=')
+        conf.trim -> value.substring(1).trim
+      }.toSeq).toSeq
+    }
 
-      val configSets = configDims.values.foldLeft(Seq(Seq[(String, 
String)]())) { (res, dim) =>
-        dim.flatMap { configSet => res.map(_ ++ configSet) }
-      }
+    configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+      dim.flatMap { configSet => res.map(_ ++ configSet) }
+    }
+  }
 
-      configSets.foreach { configSet =>
-        try {
-          runQueries(queries, testCase, (settings ++ 
configSet).toImmutableArraySeq)
-        } catch {
-          case e: Throwable =>
-            val configs = configSet.map {
-              case (k, v) => s"$k=$v"
-            }
-            logError(s"Error using configs: ${configs.mkString(",")}")
-            throw e
-        }
+  protected def runQueriesWithSparkConfigDimensions(
+      queries: Seq[String],
+      testCase: TestCase,
+      sparkConfigSet: Array[(String, String)],
+      sparkConfigDims: Seq[Seq[(String, String)]]): Unit = {

Review Comment:
   I like the origin name `configSet` and `configDims`.



##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala:
##########
@@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSparkSession with SQLHelper
     }
 
     // List of SQL queries to run
-    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
+    tempQueries.map(_.trim).filter(_ != "")
       // Fix misplacement when comment is at the end of the query.
       
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_
 != "")
+  }
 
+  protected def getSparkSettings(comments: Array[String]): Array[(String, 
String)] = {
     val settingLines = comments.filter(_.startsWith("--SET 
")).map(_.substring(6))
-    val settings = settingLines.flatMap(_.split(",").map { kv =>
+    settingLines.flatMap(_.split(",").map { kv =>
       val (conf, value) = kv.span(_ != '=')
       conf.trim -> value.substring(1).trim
     })
+  }
 
-    if (regenerateGoldenFiles) {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
-    } else {
-      // A config dimension has multiple config sets, and a config set has 
multiple configs.
-      // - config dim:     Seq[Seq[(String, String)]]
-      //   - config set:   Seq[(String, String)]
-      //     - config:     (String, String))
-      // We need to do cartesian product for all the config dimensions, to get 
a list of
-      // config sets, and run the query once for each config set.
-      val configDimLines = 
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
-      val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).transform 
{ (_, lines) =>
-        lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { 
kv =>
-          val (conf, value) = kv.span(_ != '=')
-          conf.trim -> value.substring(1).trim
-        }.toSeq).toSeq
-      }
+  protected def getSparkConfigDimensions(comments: Array[String]): 
Seq[Seq[(String, String)]] = {
+    // A config dimension has multiple config sets, and a config set has 
multiple configs.
+    // - config dim:     Seq[Seq[(String, String)]]
+    //   - config set:   Seq[(String, String)]
+    //     - config:     (String, String))
+    // We need to do cartesian product for all the config dimensions, to get a 
list of
+    // config sets, and run the query once for each config set.
+    val configDimLines = 
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
+    val configDims = configDimLines.groupBy(_.takeWhile(_ != ' 
')).view.mapValues { lines =>
+      lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { kv 
=>
+        val (conf, value) = kv.span(_ != '=')
+        conf.trim -> value.substring(1).trim
+      }.toSeq).toSeq
+    }
 
-      val configSets = configDims.values.foldLeft(Seq(Seq[(String, 
String)]())) { (res, dim) =>
-        dim.flatMap { configSet => res.map(_ ++ configSet) }
-      }
+    configDims.values.foldLeft(Seq(Seq[(String, String)]())) { (res, dim) =>
+      dim.flatMap { configSet => res.map(_ ++ configSet) }
+    }
+  }
 
-      configSets.foreach { configSet =>
-        try {
-          runQueries(queries, testCase, (settings ++ 
configSet).toImmutableArraySeq)
-        } catch {
-          case e: Throwable =>
-            val configs = configSet.map {
-              case (k, v) => s"$k=$v"
-            }
-            logError(s"Error using configs: ${configs.mkString(",")}")
-            throw e
-        }
+  protected def runQueriesWithSparkConfigDimensions(

Review Comment:
   `runQueriesWithConfig` ?



##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala:
##########
@@ -498,7 +516,7 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSparkSession with SQLHelper
   protected def runQueries(
       queries: Seq[String],
       testCase: TestCase,
-      configSet: Seq[(String, String)]): Unit = {
+      sparkConfigSet: Seq[(String, String)]): Unit = {

Review Comment:
   I think the change is no need.



##########
sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala:
##########
@@ -406,52 +402,74 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSparkSession with SQLHelper
     }
 
     // List of SQL queries to run
-    val queries = tempQueries.map(_.trim).filter(_ != "").toSeq
+    tempQueries.map(_.trim).filter(_ != "")
       // Fix misplacement when comment is at the end of the query.
       
.map(_.split("\n").filterNot(_.startsWith("--")).mkString("\n")).map(_.trim).filter(_
 != "")
+  }
 
+  protected def getSparkSettings(comments: Array[String]): Array[(String, 
String)] = {
     val settingLines = comments.filter(_.startsWith("--SET 
")).map(_.substring(6))
-    val settings = settingLines.flatMap(_.split(",").map { kv =>
+    settingLines.flatMap(_.split(",").map { kv =>
       val (conf, value) = kv.span(_ != '=')
       conf.trim -> value.substring(1).trim
     })
+  }
 
-    if (regenerateGoldenFiles) {
-      runQueries(queries, testCase, settings.toImmutableArraySeq)
-    } else {
-      // A config dimension has multiple config sets, and a config set has 
multiple configs.
-      // - config dim:     Seq[Seq[(String, String)]]
-      //   - config set:   Seq[(String, String)]
-      //     - config:     (String, String))
-      // We need to do cartesian product for all the config dimensions, to get 
a list of
-      // config sets, and run the query once for each config set.
-      val configDimLines = 
comments.filter(_.startsWith("--CONFIG_DIM")).map(_.substring(12))
-      val configDims = configDimLines.groupBy(_.takeWhile(_ != ' ')).transform 
{ (_, lines) =>
-        lines.map(_.dropWhile(_ != ' ').substring(1)).map(_.split(",").map { 
kv =>
-          val (conf, value) = kv.span(_ != '=')
-          conf.trim -> value.substring(1).trim
-        }.toSeq).toSeq
-      }
+  protected def getSparkConfigDimensions(comments: Array[String]): 
Seq[Seq[(String, String)]] = {

Review Comment:
   `getConfigDimensions`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-46179][SQL] Pull out code into reusable functions in SQLQueryTestSuite [spark]

Reply via email to