Copilot commented on code in PR #1758:
URL: https://github.com/apache/auron/pull/1758#discussion_r2731243664
##########
spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala:
##########
@@ -16,10 +16,37 @@
*/
package org.apache.auron
+import java.io.File
+
+import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
trait BaseAuronSQLSuite extends SharedSparkSession {
+ protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
Review Comment:
The workspace path is constructed using `getClass.getResource("/").getPath +
"auron-tests-workdir"`, which means all test suites extending BaseAuronSQLSuite
will share the same workspace directory. This could cause conflicts if multiple
test suites run in parallel. Consider incorporating the class name in the
workspace path (e.g., `suiteWorkspace + "/" + getClass.getSimpleName`) to
ensure test isolation, similar to how the issue mentions paths like
`spark-warehouse/org.apache.auron.AuronQuerySuite/t1`.
```suggestion
protected val suiteWorkspaceBase: String =
getClass.getResource("/").getPath + "auron-tests-workdir"
protected val suiteWorkspace: String =
suiteWorkspaceBase + "/" + getClass.getSimpleName
```
##########
spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala:
##########
@@ -16,10 +16,37 @@
*/
package org.apache.auron
+import java.io.File
+
+import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
trait BaseAuronSQLSuite extends SharedSparkSession {
+ protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
+ protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse"
+ protected val metastoreDir: String = suiteWorkspace + "/meta"
Review Comment:
The path construction using string concatenation can lead to issues on
different platforms. When `getClass.getResource("/").getPath` returns a path
that already ends with a separator (common on some platforms), the resulting
path will have double separators. Consider using `java.nio.file.Paths.get()` or
`new File(getClass.getResource("/").getPath, "auron-tests-workdir")` for more
robust path handling.
```suggestion
protected val suiteWorkspace: String =
new File(getClass.getResource("/").getPath,
"auron-tests-workdir").getPath
protected val warehouseDir: String = new File(suiteWorkspace,
"spark-warehouse").getPath
protected val metastoreDir: String = new File(suiteWorkspace,
"meta").getPath
```
##########
spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala:
##########
@@ -16,10 +16,37 @@
*/
package org.apache.auron
+import java.io.File
+
+import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
trait BaseAuronSQLSuite extends SharedSparkSession {
+ protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
+ protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse"
+ protected val metastoreDir: String = suiteWorkspace + "/meta"
+
+ protected def resetSuiteWorkspace(): Unit = {
+ val workdir = new File(suiteWorkspace)
+ if (workdir.exists()) {
+ FileUtils.forceDelete(workdir)
+ }
+ FileUtils.forceMkdir(workdir)
+ FileUtils.forceMkdir(new File(warehouseDir))
+ FileUtils.forceMkdir(new File(metastoreDir))
+ }
+
+ override def beforeAll(): Unit = {
+ // Prepare a clean workspace before SparkSession initialization
+ resetSuiteWorkspace()
+ super.beforeAll()
+ spark.sparkContext.setLogLevel("WARN")
+ }
+
+ override def afterAll(): Unit = {
+ super.afterAll()
Review Comment:
The workspace directories created in `resetSuiteWorkspace()` are never
cleaned up after tests complete. The `afterAll()` method only calls
`super.afterAll()` but doesn't delete the `suiteWorkspace` directory. This
could lead to disk space issues over time. Consider adding cleanup logic
similar to what's in `BuildInfoInSparkUISuite.afterAll()` or in other test
suites like `SparkExpressionTestsBase` which clean up their working directories.
```suggestion
try {
super.afterAll()
} finally {
val workdir = new File(suiteWorkspace)
if (workdir.exists()) {
FileUtils.forceDelete(workdir)
}
}
```
##########
spark-extension-shims-spark/src/test/scala/org/apache/spark/sql/execution/BuildInfoInSparkUISuite.scala:
##########
@@ -33,12 +33,12 @@ class BuildInfoInSparkUISuite extends AuronQueryTest with
BaseAuronSQLSuite {
super.sparkConf.set("spark.eventLog.dir", testDir.toString)
}
- override protected def beforeAll(): Unit = {
+ override def beforeAll(): Unit = {
testDir = Utils.createTempDir(namePrefix = "spark-events")
super.beforeAll()
}
- override protected def afterAll(): Unit = {
+ override def afterAll(): Unit = {
Utils.deleteRecursively(testDir)
Review Comment:
The `afterAll` method in BuildInfoInSparkUISuite doesn't call
`super.afterAll()`, which means the parent class cleanup logic won't execute.
This breaks the cleanup chain and could leave resources uncleaned. Add
`super.afterAll()` after the `Utils.deleteRecursively(testDir)` call, or
consider wrapping it in a try-finally block to ensure the parent cleanup always
happens.
```suggestion
try {
Utils.deleteRecursively(testDir)
} finally {
super.afterAll()
}
```
##########
spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala:
##########
@@ -16,10 +16,37 @@
*/
package org.apache.auron
+import java.io.File
+
+import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
trait BaseAuronSQLSuite extends SharedSparkSession {
+ protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
+ protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse"
+ protected val metastoreDir: String = suiteWorkspace + "/meta"
+
+ protected def resetSuiteWorkspace(): Unit = {
+ val workdir = new File(suiteWorkspace)
+ if (workdir.exists()) {
+ FileUtils.forceDelete(workdir)
+ }
+ FileUtils.forceMkdir(workdir)
+ FileUtils.forceMkdir(new File(warehouseDir))
+ FileUtils.forceMkdir(new File(metastoreDir))
+ }
Review Comment:
The `resetSuiteWorkspace()` method uses `FileUtils.forceDelete()` which can
throw exceptions if the directory is in use or locked. Since this is called in
`beforeAll()` before `super.beforeAll()`, an exception here will prevent proper
test initialization. Consider adding error handling or using a try-catch block
to log errors and continue with test execution, or at minimum ensure that
failures here are properly propagated.
##########
spark-extension-shims-spark/src/test/scala/org/apache/auron/BaseAuronSQLSuite.scala:
##########
@@ -16,10 +16,37 @@
*/
package org.apache.auron
+import java.io.File
+
+import org.apache.commons.io.FileUtils
import org.apache.spark.SparkConf
import org.apache.spark.sql.test.SharedSparkSession
trait BaseAuronSQLSuite extends SharedSparkSession {
+ protected val suiteWorkspace: String = getClass.getResource("/").getPath +
"auron-tests-workdir"
+ protected val warehouseDir: String = suiteWorkspace + "/spark-warehouse"
+ protected val metastoreDir: String = suiteWorkspace + "/meta"
+
+ protected def resetSuiteWorkspace(): Unit = {
+ val workdir = new File(suiteWorkspace)
+ if (workdir.exists()) {
+ FileUtils.forceDelete(workdir)
+ }
+ FileUtils.forceMkdir(workdir)
+ FileUtils.forceMkdir(new File(warehouseDir))
+ FileUtils.forceMkdir(new File(metastoreDir))
Review Comment:
The `metastoreDir` is created but never used in the Spark configuration.
Consider either removing the metastore directory creation (lines 28 and 37) if
it's not needed, or adding the appropriate Spark configuration (e.g.,
`spark.sql.catalog.spark_catalog.warehouse` or similar) to actually use this
directory. Looking at similar test suites like SparkTestsBase, they configure
metastore paths when they create them.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]