MaxGekk commented on a change in pull request #25942: [SPARK-29242][SQL][TEST] 
Check results of expression examples
URL: https://github.com/apache/spark/pull/25942#discussion_r328917752
 
 

 ##########
 File path: sql/core/src/test/scala/org/apache/spark/sql/SQLQuerySuite.scala
 ##########
 @@ -140,6 +143,56 @@ class SQLQuerySuite extends QueryTest with 
SharedSparkSession {
     }
   }
 
+  test("check outputs of expression examples") {
+    def unindentAndTrim(s: String): String = {
+      s.replaceAll("\n\\s+", "\n").trim
+    }
+    val beginSqlStmtRe = "  > ".r
+    val endSqlStmtRe = ";\n".r
+    def checkExampleSyntax(example: String): Unit = {
+      val beginStmtNum = beginSqlStmtRe.findAllIn(example).length
+      val endStmtNum = endSqlStmtRe.findAllIn(example).length
+      assert(beginStmtNum === endStmtNum,
+        "The number of ` > ` does not match to the number of `;`")
+    }
+    val exampleRe = """^(.+);\n(?s)(.+)$""".r
+    val ignoreSet = Set(
+      // One of examples shows getting the current timestamp
+      "org.apache.spark.sql.catalyst.expressions.UnixTimestamp",
+      // Random output without a seed
+      "org.apache.spark.sql.catalyst.expressions.Rand",
+      "org.apache.spark.sql.catalyst.expressions.Randn",
+      "org.apache.spark.sql.catalyst.expressions.Shuffle",
+      "org.apache.spark.sql.catalyst.expressions.Uuid",
+      // The example calls methods that return unstable results.
+      "org.apache.spark.sql.catalyst.expressions.CallMethodViaReflection")
+
+    withSQLConf(SQLConf.UTC_TIMESTAMP_FUNC_ENABLED.key -> "true") {
+      spark.sessionState.functionRegistry.listFunction().foreach { funcId =>
+        val info = spark.sessionState.catalog.lookupFunctionInfo(funcId)
+        val className = info.getClassName
+        if (!ignoreSet.contains(className)) {
+          withClue(s"Function '${info.getName}', Expression class 
'$className'") {
+            val example = info.getExamples
+            logTrace(example)
+            checkExampleSyntax(example)
+            example.split("  > ").toList.foreach(_ match {
+              case exampleRe(sql, output) =>
+                val df = spark.sql(sql)
+                val actual = unindentAndTrim(
+                  
hiveResultString(df.queryExecution.executedPlan).mkString("\n"))
 
 Review comment:
   Purpose of this test is to check that output of examples is up to date, and 
matches to current output of `spark-sql`.
   Correctness of examples without codegen should be checked in separate tests 
for the expression or function, I do believe.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to