pan3793 commented on code in PR #2847:
URL: https://github.com/apache/incubator-kyuubi/pull/2847#discussion_r894189522
##########
extensions/spark/kyuubi-spark-connector-tpcds/src/test/scala/org/apache/kyuubi/spark/connector/tpcds/TPCDSQuerySuite.scala:
##########
@@ -17,36 +17,100 @@
package org.apache.kyuubi.spark.connector.tpcds
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Path, Paths}
+
+import scala.collection.JavaConverters._
import scala.io.{Codec, Source}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.kyuubi.KyuubiFunSuite
import
org.apache.kyuubi.spark.connector.common.LocalSparkSession.withSparkSession
+import org.apache.kyuubi.spark.connector.common.SparkUtils
+
+// scalastyle:off line.size.limit
+/**
+ * To run this test suite:
+ * {{{
+ * build/mvn clean install \
+ * -Dmaven.plugin.scalatest.exclude.tags="" \
+ * -Dtest=none
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ *
+ * To re-generate golden files for this suite:
+ * {{{
+ * KYUUBI_UPDATE=1 build/mvn clean install \
+ * -Dmaven.plugin.scalatest.exclude.tags="" \
+ * -Dtest=none
-DwildcardSuites=org.apache.kyuubi.spark.connector.tpcds.TPCDSQuerySuite
+ * }}}
+ */
+// scalastyle:on line.size.limit
class TPCDSQuerySuite extends KyuubiFunSuite {
+ private val regenerateGoldenFiles =
sys.env.get("KYUUBI_UPDATE").contains("1")
+
+ val baseResourcePath: Path =
+ Paths.get("src", "main", "resources")
+
val queries: Set[String] = (1 to 99).map(i => s"q$i").toSet -
("q14", "q23", "q24", "q39") +
("q14a", "q14b", "q23a", "q23b", "q24a", "q24b", "q39a", "q39b")
+ private def fileToString(file: Path): String = {
+ new String(Files.readAllBytes(file), StandardCharsets.UTF_8)
+ }
+
test("run query on sf0") {
+ assume(SparkUtils.isSparkVersionEqualTo("3.2"))
+ val viewSuffix = "view";
val sparkConf = new SparkConf().setMaster("local[*]")
.set("spark.ui.enabled", "false")
.set("spark.sql.catalogImplementation", "in-memory")
.set("spark.sql.catalog.tpcds", classOf[TPCDSCatalog].getName)
.set("spark.sql.catalog.tpcds.useTableSchema_2_6", "true")
withSparkSession(SparkSession.builder.config(sparkConf).getOrCreate()) {
spark =>
- spark.sql("USE tpcds.sf0")
+ spark.sql("USE tpcds.tiny")
queries.map { queryName =>
- val in =
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/$queryName.sql")
+ val in =
getClass.getClassLoader.getResourceAsStream(s"tpcds_3.2/sql/$queryName.sql")
val queryContent: String =
Source.fromInputStream(in)(Codec.UTF8).mkString
in.close()
queryName -> queryContent
}.foreach { case (name, sql) =>
try {
- spark.sql(sql).collect()
+ val result = spark.sql(sql).collect()
+ val schema = spark.sql(sql).schema
+ val schemaDDL = schema.toDDL + "\n"
+ spark.createDataFrame(result.toList.asJava,
schema).createTempView(s"$name$viewSuffix")
+ val sumHashResult =
+ spark.sql(s"select sum(hash(*)) from
$name$viewSuffix").collect().head.get(0) + "\n"
+
+ // scalastyle:off println
Review Comment:
Technically, the result is affectted by the algorithm, the seed, and the
memory bits of the value. I'm not sure ARM, but the x86 based JDK should has a
deterministic memory layout
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]