advancedxy commented on code in PR #331:
URL: https://github.com/apache/datafusion-comet/pull/331#discussion_r1584935565


##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase 
with AdaptiveSparkPlanH
     val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
     Files.write(Paths.get(rawCoverageFilePath), 
str.getBytes(StandardCharsets.UTF_8))
   }
+
+  /**
+   * Manual test to calculate Spark builtin expressions coverage support by 
the Datafusion
+   * directly
+   *
+   * The test will update files doc/spark_builtin_expr_df_coverage.txt,
+   *
+   * Requires to set DATAFUSIONCLI_PATH env variable to valid path to 
datafusion-cli
+   */
+  test("Test Spark builtin expressions coverage by Datafusion directly") {
+    val builtinExamplesMap = getExamples()
+
+    // key - function name
+    // value - list of result shows if function supported by Datafusion
+    val resultsMap = new mutable.HashMap[String, CoverageResult]()
+
+    builtinExamplesMap.foreach {
+      case (funcName, q :: _) =>
+        val queryResult =
+          try {
+            // Example with predefined values
+            // e.g. SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col)
+            if (q.toLowerCase.contains(" from values")) {
+              val select = selectPattern.findFirstMatchIn(q).map(_.group(0))
+              val values = valuesPattern.findFirstMatchIn(q).map(_.group(0))
+              (select, values) match {
+                case (Some(s), Some(v)) =>
+                  withTempDir { dir =>
+                    val path = new Path(dir.toURI.toString).toUri.getPath
+                    spark.sql(s"select * 
$v").repartition(1).write.mode("overwrite").parquet(path)
+                    runDatafusionCli(s"""$s '$path/*.parquet'""")
+                  }
+
+                case _ =>
+                  sys.error("Cannot parse properly")
+              }
+            } else {
+              // Process the simple example like `SELECT cos(0);`
+              runDatafusionCli(q)
+            }
+            CoverageResult(CoverageResultStatus.Passed.toString, Seq((q, 
"OK")))
+          } catch {
+            case e: Throwable =>
+              CoverageResult(CoverageResultStatus.Failed.toString, Seq((q, 
e.getMessage)))
+          }
+        resultsMap.put(funcName, queryResult)
+
+      case (funcName, List()) =>
+        resultsMap.put(
+          funcName,
+          CoverageResult(
+            CoverageResultStatus.Skipped.toString,
+            Seq(("", "No examples found in 
spark.sessionState.functionRegistry"))))
+    }
+
+    resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t")
+    val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
+    Files.write(Paths.get(rawCoverageFileDatafusionPath), 
str.getBytes(StandardCharsets.UTF_8))
+  }
+
+  private def runDatafusionCli(sql: String) = {
+
+    val datafusionCliPath = sys.env.getOrElse(
+      "DATAFUSIONCLI_PATH",
+      sys.error("DATAFUSIONCLI_PATH env variable not set"))
+
+    val tempFilePath = Files.createTempFile("temp-", ".sql")
+    Files.write(tempFilePath, sql.getBytes)
+
+    val command = s"""$datafusionCliPath/datafusion-cli -f $tempFilePath"""
+
+    val stdout = new StringBuilder
+    val stderr = new StringBuilder
+
+    command.!(
+      ProcessLogger(
+        out => stdout.append(out).append("\n"), // stdout
+        err => stderr.append(err).append("\n") // stderr
+      ))
+
+    Files.delete(tempFilePath)

Review Comment:
   ```suggestion
       try {
       Files.write(tempFilePath, sql.getBytes)
      
       val command = s"""$datafusionCliPath/datafusion-cli -f $tempFilePath"""
   
       val stdout = new StringBuilder
       val stderr = new StringBuilder
   
       command.!(
         ProcessLogger(
           out => stdout.append(out).append("\n"), // stdout
           err => stderr.append(err).append("\n") // stderr
         ))
      } finally {
         Files.delete(tempFilePath)
      }
   ```
   
   Let's wrap the `delete` in a finally block to make sure the temp file is 
deleted. The indentation is not adjusted yet, you may change it to match the 
style.



##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase 
with AdaptiveSparkPlanH
     val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
     Files.write(Paths.get(rawCoverageFilePath), 
str.getBytes(StandardCharsets.UTF_8))
   }
+
+  /**
+   * Manual test to calculate Spark builtin expressions coverage support by 
the Datafusion
+   * directly
+   *
+   * The test will update files doc/spark_builtin_expr_df_coverage.txt,

Review Comment:
   I think it would be better to add the new column in the previous 
file(spark_builtin_expr_coverage.txt), so that we can clearly get which func 
has already been implemented, which is not and will that func be supported 
directly in the DataFusion kernel without looking at two files.



##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase 
with AdaptiveSparkPlanH
     val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
     Files.write(Paths.get(rawCoverageFilePath), 
str.getBytes(StandardCharsets.UTF_8))
   }
+
+  /**
+   * Manual test to calculate Spark builtin expressions coverage support by 
the Datafusion
+   * directly
+   *
+   * The test will update files doc/spark_builtin_expr_df_coverage.txt,
+   *
+   * Requires to set DATAFUSIONCLI_PATH env variable to valid path to 
datafusion-cli

Review Comment:
   If we are going to build the coverage file directly in the CI pipeline, I 
think this is fine. Otherwise, I think we may need to update the development.md 
file or any other documentation to indicate how to run this coverage suite.
   
   In my opinion, I would prefer to set up the CI pipeline earlier so others 
don't have to bother how to run this file.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to