spark git commit: [SPARK-24562][TESTS] Support different configs for same test in SQLQueryTestSuite

wenchen Wed, 11 Jul 2018 08:43:40 -0700

Repository: spark
Updated Branches:
  refs/heads/master 006e798e4 -> 592cc8458



[SPARK-24562][TESTS] Support different configs for same test in 
SQLQueryTestSuite

## What changes were proposed in this pull request?

The PR proposes to add support for running the same SQL test input files 
against different configs leading to the same result.

## How was this patch tested?

Involved UTs

Author: Marco Gaido <[email protected]>

Closes #21568 from mgaido91/SPARK-24562.


Project: http://git-wip-us.apache.org/repos/asf/spark/repo
Commit: http://git-wip-us.apache.org/repos/asf/spark/commit/592cc845
Tree: http://git-wip-us.apache.org/repos/asf/spark/tree/592cc845
Diff: http://git-wip-us.apache.org/repos/asf/spark/diff/592cc845

Branch: refs/heads/master
Commit: 592cc84583d74c78e4cdf34a3b82692c8de8f4a9
Parents: 006e798
Author: Marco Gaido <[email protected]>
Authored: Wed Jul 11 23:43:06 2018 +0800
Committer: Wenchen Fan <[email protected]>
Committed: Wed Jul 11 23:43:06 2018 +0800

----------------------------------------------------------------------
 .../sql-tests/inputs/join-empty-relation.sql    |  5 ++
 .../resources/sql-tests/inputs/natural-join.sql |  5 ++
 .../resources/sql-tests/inputs/outer-join.sql   |  5 ++
 .../exists-joins-and-set-ops.sql                |  4 ++
 .../inputs/subquery/in-subquery/in-joins.sql    |  4 ++
 .../subquery/in-subquery/not-in-joins.sql       |  4 ++
 .../apache/spark/sql/SQLQueryTestSuite.scala    | 53 +++++++++++++++++---
 7 files changed, 74 insertions(+), 6 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql 
b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
index 8afa327..2e6a5f3 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/join-empty-relation.sql
@@ -1,3 +1,8 @@
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
 CREATE TEMPORARY VIEW t1 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 CREATE TEMPORARY VIEW t2 AS SELECT * FROM VALUES (1) AS GROUPING(a);
 

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql 
b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
index 71a5015..e0abeda 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/natural-join.sql
@@ -1,3 +1,8 @@
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
 create temporary view nt1 as select * from values
   ("one", 1),
   ("two", 2),

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
----------------------------------------------------------------------
diff --git a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql 
b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
index cdc6c81..ce09c21 100644
--- a/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
+++ b/sql/core/src/test/resources/sql-tests/inputs/outer-join.sql
@@ -1,3 +1,8 @@
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
+
 -- SPARK-17099: Incorrect result when HAVING clause is added to group by query
 CREATE OR REPLACE TEMPORARY VIEW t1 AS SELECT * FROM VALUES
 (-234), (145), (367), (975), (298)

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
index cc4ed64..cefc3fe 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/exists-subquery/exists-joins-and-set-ops.sql
@@ -1,5 +1,9 @@
 -- Tests EXISTS subquery support. Tests Exists subquery
 -- used in Joins (Both when joins occurs in outer and suquery blocks)
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
 
 CREATE TEMPORARY VIEW EMP AS SELECT * FROM VALUES
   (100, "emp 1", date "2005-01-01", 100.00D, 10),

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
index 880175f..22f3eaf 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/in-joins.sql
@@ -1,5 +1,9 @@
 -- A test suite for IN JOINS in parent side, subquery, and both predicate 
subquery
 -- It includes correlated cases.
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
index e09b91f..4f8ca8b 100644
--- 
a/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
+++ 
b/sql/core/src/test/resources/sql-tests/inputs/subquery/in-subquery/not-in-joins.sql
@@ -1,5 +1,9 @@
 -- A test suite for not-in-joins in parent side, subquery, and both predicate 
subquery
 -- It includes correlated cases.
+-- List of configuration the test suite is run against:
+--SET spark.sql.autoBroadcastJoinThreshold=10485760
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=true
+--SET 
spark.sql.autoBroadcastJoinThreshold=-1,spark.sql.join.preferSortMergeJoin=false
 
 create temporary view t1 as select * from values
   ("val1a", 6S, 8, 10L, float(15.0), 20D, 20E2, timestamp '2014-04-04 
01:00:00.000', date '2014-04-04'),

http://git-wip-us.apache.org/repos/asf/spark/blob/592cc845/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
----------------------------------------------------------------------
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala 
b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
index beac969..826408c 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/SQLQueryTestSuite.scala
@@ -54,6 +54,7 @@ import org.apache.spark.sql.types.StructType
  * The format for input files is simple:
  *  1. A list of SQL queries separated by semicolon.
  *  2. Lines starting with -- are treated as comments and ignored.
+ *  3. Lines starting with --SET are used to run the file with the following 
set of configs.
  *
  * For example:
  * {{{
@@ -138,18 +139,58 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
   private def runTest(testCase: TestCase): Unit = {
     val input = fileToString(new File(testCase.inputFile))
 
+    val (comments, code) = input.split("\n").partition(_.startsWith("--"))
+    val configSets = {
+      val configLines = 
comments.filter(_.startsWith("--SET")).map(_.substring(5))
+      val configs = configLines.map(_.split(",").map { confAndValue =>
+        val (conf, value) = confAndValue.span(_ != '=')
+        conf.trim -> value.substring(1).trim
+      })
+      // When we are regenerating the golden files we don't need to run all 
the configs as they
+      // all need to return the same result
+      if (regenerateGoldenFiles && configs.nonEmpty) {
+        configs.take(1)
+      } else {
+        configs
+      }
+    }
     // List of SQL queries to run
-    val queries: Seq[String] = {
-      val cleaned = 
input.split("\n").filterNot(_.startsWith("--")).mkString("\n")
-      // note: this is not a robust way to split queries using semicolon, but 
works for now.
-      cleaned.split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
+    // note: this is not a robust way to split queries using semicolon, but 
works for now.
+    val queries = 
code.mkString("\n").split("(?<=[^\\\\]);").map(_.trim).filter(_ != "").toSeq
+
+    if (configSets.isEmpty) {
+      runQueries(queries, testCase.resultFile, None)
+    } else {
+      configSets.foreach { configSet =>
+        try {
+          runQueries(queries, testCase.resultFile, Some(configSet))
+        } catch {
+          case e: Throwable =>
+            val configs = configSet.map {
+              case (k, v) => s"$k=$v"
+            }
+            logError(s"Error using configs: ${configs.mkString(",")}")
+            throw e
+        }
+      }
     }
+  }
 
+  private def runQueries(
+      queries: Seq[String],
+      resultFileName: String,
+      configSet: Option[Seq[(String, String)]]): Unit = {
     // Create a local SparkSession to have stronger isolation between 
different test cases.
     // This does not isolate catalog changes.
     val localSparkSession = spark.newSession()
     loadTestData(localSparkSession)
 
+    if (configSet.isDefined) {
+      // Execute the list of set operation in order to add the desired configs
+      val setOperations = configSet.get.map { case (key, value) => s"set 
$key=$value" }
+      logInfo(s"Setting configs: ${setOperations.mkString(", ")}")
+      setOperations.foreach(localSparkSession.sql)
+    }
     // Run the SQL queries preparing them for comparison.
     val outputs: Seq[QueryOutput] = queries.map { sql =>
       val (schema, output) = getNormalizedResult(localSparkSession, sql)
@@ -167,7 +208,7 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
         s"-- Number of queries: ${outputs.size}\n\n\n" +
         outputs.zipWithIndex.map{case (qr, i) => 
qr.toString(i)}.mkString("\n\n\n") + "\n"
       }
-      val resultFile = new File(testCase.resultFile)
+      val resultFile = new File(resultFileName)
       val parent = resultFile.getParentFile
       if (!parent.exists()) {
         assert(parent.mkdirs(), "Could not create directory: " + parent)
@@ -177,7 +218,7 @@ class SQLQueryTestSuite extends QueryTest with 
SharedSQLContext {
 
     // Read back the golden file.
     val expectedOutputs: Seq[QueryOutput] = {
-      val goldenOutput = fileToString(new File(testCase.resultFile))
+      val goldenOutput = fileToString(new File(resultFileName))
       val segments = goldenOutput.split("-- !query.+\n")
 
       // each query has 3 segments, plus the header


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

spark git commit: [SPARK-24562][TESTS] Support different configs for same test in SQLQueryTestSuite

Reply via email to