advancedxy commented on code in PR #331:
URL: https://github.com/apache/datafusion-comet/pull/331#discussion_r1584935565
##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase
with AdaptiveSparkPlanH
val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
Files.write(Paths.get(rawCoverageFilePath),
str.getBytes(StandardCharsets.UTF_8))
}
+
+ /**
+ * Manual test to calculate Spark builtin expressions coverage support by
the Datafusion
+ * directly
+ *
+ * The test will update files doc/spark_builtin_expr_df_coverage.txt,
+ *
+ * Requires to set DATAFUSIONCLI_PATH env variable to valid path to
datafusion-cli
+ */
+ test("Test Spark builtin expressions coverage by Datafusion directly") {
+ val builtinExamplesMap = getExamples()
+
+ // key - function name
+ // value - list of result shows if function supported by Datafusion
+ val resultsMap = new mutable.HashMap[String, CoverageResult]()
+
+ builtinExamplesMap.foreach {
+ case (funcName, q :: _) =>
+ val queryResult =
+ try {
+ // Example with predefined values
+ // e.g. SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col)
+ if (q.toLowerCase.contains(" from values")) {
+ val select = selectPattern.findFirstMatchIn(q).map(_.group(0))
+ val values = valuesPattern.findFirstMatchIn(q).map(_.group(0))
+ (select, values) match {
+ case (Some(s), Some(v)) =>
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString).toUri.getPath
+ spark.sql(s"select *
$v").repartition(1).write.mode("overwrite").parquet(path)
+ runDatafusionCli(s"""$s '$path/*.parquet'""")
+ }
+
+ case _ =>
+ sys.error("Cannot parse properly")
+ }
+ } else {
+ // Process the simple example like `SELECT cos(0);`
+ runDatafusionCli(q)
+ }
+ CoverageResult(CoverageResultStatus.Passed.toString, Seq((q,
"OK")))
+ } catch {
+ case e: Throwable =>
+ CoverageResult(CoverageResultStatus.Failed.toString, Seq((q,
e.getMessage)))
+ }
+ resultsMap.put(funcName, queryResult)
+
+ case (funcName, List()) =>
+ resultsMap.put(
+ funcName,
+ CoverageResult(
+ CoverageResultStatus.Skipped.toString,
+ Seq(("", "No examples found in
spark.sessionState.functionRegistry"))))
+ }
+
+ resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t")
+ val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
+ Files.write(Paths.get(rawCoverageFileDatafusionPath),
str.getBytes(StandardCharsets.UTF_8))
+ }
+
+ private def runDatafusionCli(sql: String) = {
+
+ val datafusionCliPath = sys.env.getOrElse(
+ "DATAFUSIONCLI_PATH",
+ sys.error("DATAFUSIONCLI_PATH env variable not set"))
+
+ val tempFilePath = Files.createTempFile("temp-", ".sql")
+ Files.write(tempFilePath, sql.getBytes)
+
+ val command = s"""$datafusionCliPath/datafusion-cli -f $tempFilePath"""
+
+ val stdout = new StringBuilder
+ val stderr = new StringBuilder
+
+ command.!(
+ ProcessLogger(
+ out => stdout.append(out).append("\n"), // stdout
+ err => stderr.append(err).append("\n") // stderr
+ ))
+
+ Files.delete(tempFilePath)
Review Comment:
```suggestion
try {
Files.write(tempFilePath, sql.getBytes)
val command = s"""$datafusionCliPath/datafusion-cli -f $tempFilePath"""
val stdout = new StringBuilder
val stderr = new StringBuilder
command.!(
ProcessLogger(
out => stdout.append(out).append("\n"), // stdout
err => stderr.append(err).append("\n") // stderr
))
} finally {
Files.delete(tempFilePath)
}
```
Let's wrap the `delete` in a finally block to make sure the temp file is
deleted. The indentation is not adjusted yet, you may change it to match the
style.
##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase
with AdaptiveSparkPlanH
val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
Files.write(Paths.get(rawCoverageFilePath),
str.getBytes(StandardCharsets.UTF_8))
}
+
+ /**
+ * Manual test to calculate Spark builtin expressions coverage support by
the Datafusion
+ * directly
+ *
+ * The test will update files doc/spark_builtin_expr_df_coverage.txt,
Review Comment:
I think it would be better to add the new column in the previous
file(spark_builtin_expr_coverage.txt), so that we can clearly get which func
has already been implemented, which is not and will that func be supported
directly in the DataFusion kernel without looking at two files.
##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -135,6 +146,97 @@ class CometExpressionCoverageSuite extends CometTestBase
with AdaptiveSparkPlanH
val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
Files.write(Paths.get(rawCoverageFilePath),
str.getBytes(StandardCharsets.UTF_8))
}
+
+ /**
+ * Manual test to calculate Spark builtin expressions coverage support by
the Datafusion
+ * directly
+ *
+ * The test will update files doc/spark_builtin_expr_df_coverage.txt,
+ *
+ * Requires to set DATAFUSIONCLI_PATH env variable to valid path to
datafusion-cli
Review Comment:
If we are going to build the coverage file directly in the CI pipeline, I
think this is fine. Otherwise, I think we may need to update the development.md
file or any other documentation to indicate how to run this coverage suite.
In my opinion, I would prefer to set up the CI pipeline earlier so others
don't have to bother how to run this file.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]