[GitHub] [incubator-kyuubi] jiaoqingbo commented on a diff in pull request #2847: [KYUUBI #2704] verify TPC-DS query output

GitBox Thu, 09 Jun 2022 18:55:23 -0700


jiaoqingbo commented on code in PR #2847:
URL: https://github.com/apache/incubator-kyuubi/pull/2847#discussion_r894091568



##########
extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala:
##########
@@ -17,36 +17,100 @@
 
 package org.apache.kyuubi.spark.connector.tpcds
 
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path, Paths}
+
+import scala.collection.JavaConverters._
 import scala.io.{Codec, Source}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
 
 import org.apache.kyuubi.KyuubiFunSuite
 import 
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.spark.connector.common.SparkUtils
+
+// scalastyle:off line.size.limit
+/**
+ * To run this test suite:
+ * {{{
+ *   build/mvn clean install \
+ *     -Dmaven.plugin.scalatest.exclude.tags="" \
+ *     -Dtest=none 
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ *
+ * To re-generate golden files for this suite:
+ * {{{
+ *   KYUUBI_UPDATE=1 build/mvn clean install \
+ *     -Dmaven.plugin.scalatest.exclude.tags="" \
+ *     -Dtest=none 
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ */
+// scalastyle:on line.size.limit
 
 class TPCDSQuerySuite extends KyuubiFunSuite {
 
+  private val regenerateGoldenFiles = 
sys.env.get("KYUUBI_UPDATE").contains("1")
+
+  val baseResourcePath: Path =
+    Paths.get("src", "main", "resources")
+
   val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
     ("q14", "q23", "q24", "q39") +
     ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b")
 
+  private def fileToString(file: Path): String = {
+    new String(Files.readAllBytes(file), StandardCharsets.UTF_8)
+  }
+
   test("run query on sf0") {
+    assume(SparkUtils.isSparkVersionEqualTo("3.2"))
+    val viewSuffix = "view";
     val sparkConf = new SparkConf().setMaster("local[*]")
       .set("spark.ui.enabled", "false")
       .set("spark.sql.catalogImplementation", "in-memory")
       .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
       .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
     withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { 
spark =>
-      spark.sql("USE tpcds.sf0")
+      spark.sql("USE tpcds.tiny")
       queries.map { queryName =>
-        val in = 
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/$queryName.sql")
+        val in = 
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/sql/$queryName.sql")
         val queryContent: String = 
Source.fromInputStream(in)(Codec.UTF8).mkString
         in.close()
         queryName -> queryContent
       }.foreach { case (name, sql) =>
         try {
-          spark.sql(sql).collect()
+          val result = spark.sql(sql).collect()
+          val schema = spark.sql(sql).schema
+          val schemaDDL = schema.toDDL + "\n"
+          spark.createDataFrame(result.toList.asJava, 
schema).createTempView(s"$name$viewSuffix")
+          val sumHashResult =
+            spark.sql(s"select sum(hash(*)) from 
$name$viewSuffix").collect().head.get(0) + "\n"
+
+          // scalastyle:off println

Review Comment:
   The sumHashResult I calculated locally is different from the remote CI's, 
wait for the CI to execute to get the results and delete these



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [incubator-kyuubi] jiaoqingbo commented on a diff in pull request #2847: [KYUUBI #2704] verify TPC-DS query output

Reply via email to