[GitHub] [incubator-kyuubi] pan3793 commented on a diff in pull request #2847: [KYUUBI #2704] verify TPC-DS query output

GitBox Thu, 09 Jun 2022 23:23:13 -0700


pan3793 commented on code in PR #2847:
URL: https://github.com/apache/incubator-kyuubi/pull/2847#discussion_r894189522



##########
extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala:
##########
@@ -17,36 +17,100 @@
 
 package org.apache.kyuubi.spark.connector.tpcds
 
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path, Paths}
+
+import scala.collection.JavaConverters._
 import scala.io.{Codec, Source}
 
 import org.apache.spark.SparkConf
 import org.apache.spark.sql.SparkSession
 
 import org.apache.kyuubi.KyuubiFunSuite
 import 
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.spark.connector.common.SparkUtils
+
+// scalastyle:off line.size.limit
+/**
+ * To run this test suite:
+ * {{{
+ *   build/mvn clean install \
+ *     -Dmaven.plugin.scalatest.exclude.tags="" \
+ *     -Dtest=none 
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ *
+ * To re-generate golden files for this suite:
+ * {{{
+ *   KYUUBI_UPDATE=1 build/mvn clean install \
+ *     -Dmaven.plugin.scalatest.exclude.tags="" \
+ *     -Dtest=none 
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ */
+// scalastyle:on line.size.limit
 
 class TPCDSQuerySuite extends KyuubiFunSuite {
 
+  private val regenerateGoldenFiles = 
sys.env.get("KYUUBI_UPDATE").contains("1")
+
+  val baseResourcePath: Path =
+    Paths.get("src", "main", "resources")
+
   val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
     ("q14", "q23", "q24", "q39") +
     ("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b")
 
+  private def fileToString(file: Path): String = {
+    new String(Files.readAllBytes(file), StandardCharsets.UTF_8)
+  }
+
   test("run query on sf0") {
+    assume(SparkUtils.isSparkVersionEqualTo("3.2"))
+    val viewSuffix = "view";
     val sparkConf = new SparkConf().setMaster("local[*]")
       .set("spark.ui.enabled", "false")
       .set("spark.sql.catalogImplementation", "in-memory")
       .set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
       .set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
     withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) { 
spark =>
-      spark.sql("USE tpcds.sf0")
+      spark.sql("USE tpcds.tiny")
       queries.map { queryName =>
-        val in = 
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/$queryName.sql")
+        val in = 
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/sql/$queryName.sql")
         val queryContent: String = 
Source.fromInputStream(in)(Codec.UTF8).mkString
         in.close()
         queryName -> queryContent
       }.foreach { case (name, sql) =>
         try {
-          spark.sql(sql).collect()
+          val result = spark.sql(sql).collect()
+          val schema = spark.sql(sql).schema
+          val schemaDDL = schema.toDDL + "\n"
+          spark.createDataFrame(result.toList.asJava, 
schema).createTempView(s"$name$viewSuffix")
+          val sumHashResult =
+            spark.sql(s"select sum(hash(*)) from 
$name$viewSuffix").collect().head.get(0) + "\n"
+
+          // scalastyle:off println

Review Comment:
   Technically, the result is affectted by the algorithm, the seed, and the 
memory bits of the value. I'm not sure ARM, but the x86 based JDK should has a 
deterministic memory layout



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [incubator-kyuubi] pan3793 commented on a diff in pull request #2847: [KYUUBI #2704] verify TPC-DS query output

Reply via email to