This is an automated email from the ASF dual-hosted git repository.
hongze pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new c2e77860f [VL] Avoid using debug instance of JniWorkspace in
VeloxBloomFilterTest
c2e77860f is described below
commit c2e77860ff569999dbde5a44159ebef2a96855bf
Author: Hongze Zhang <[email protected]>
AuthorDate: Tue Apr 23 10:04:01 2024 +0800
[VL] Avoid using debug instance of JniWorkspace in VeloxBloomFilterTest
---
.../gluten/backendsapi/velox/ListenerApiImpl.scala | 2 ++
.../spark/util/sketch/VeloxBloomFilterTest.java | 2 --
.../org/apache/gluten/vectorized/JniWorkspace.java | 3 +-
.../org/apache/spark/util/SparkDirectoryUtil.scala | 33 ++++++++++++++++++++--
.../gluten/vectorized/NativePlanEvaluator.java | 3 +-
.../spark/shuffle/ColumnarShuffleWriter.scala | 1 +
6 files changed, 37 insertions(+), 7 deletions(-)
diff --git
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/ListenerApiImpl.scala
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/ListenerApiImpl.scala
index bb3b6ecf0..0ea95ad8e 100644
---
a/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/ListenerApiImpl.scala
+++
b/backends-velox/src/main/scala/org/apache/gluten/backendsapi/velox/ListenerApiImpl.scala
@@ -31,6 +31,7 @@ import
org.apache.spark.sql.execution.datasources.velox.{VeloxOrcWriterInjects,
import org.apache.spark.sql.expression.UDFResolver
import org.apache.spark.sql.internal.GlutenConfigUtil
import org.apache.spark.sql.internal.StaticSQLConf
+import org.apache.spark.util.SparkDirectoryUtil
import org.apache.commons.lang3.StringUtils
@@ -140,6 +141,7 @@ class ListenerApiImpl extends ListenerApi {
}
private def initialize(conf: SparkConf): Unit = {
+ SparkDirectoryUtil.init(conf)
val debugJni = conf.getBoolean(GlutenConfig.GLUTEN_DEBUG_MODE,
defaultValue = false) &&
conf.getBoolean(GlutenConfig.GLUTEN_DEBUG_KEEP_JNI_WORKSPACE,
defaultValue = false)
if (debugJni) {
diff --git
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
b/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
index ce439b864..fc1cc03f8 100644
---
a/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
+++
b/backends-velox/src/test/java/org/apache/spark/util/sketch/VeloxBloomFilterTest.java
@@ -18,7 +18,6 @@ package org.apache.spark.util.sketch;
import org.apache.gluten.backendsapi.ListenerApi;
import org.apache.gluten.backendsapi.velox.ListenerApiImpl;
-import org.apache.gluten.vectorized.JniWorkspace;
import org.apache.spark.SparkConf;
import org.apache.spark.util.TaskResources$;
@@ -33,7 +32,6 @@ public class VeloxBloomFilterTest {
@BeforeClass
public static void setup() {
- JniWorkspace.enableDebug();
final ListenerApi api = new ListenerApiImpl();
api.onDriverStart(new SparkConf());
}
diff --git
a/gluten-core/src/main/java/org/apache/gluten/vectorized/JniWorkspace.java
b/gluten-core/src/main/java/org/apache/gluten/vectorized/JniWorkspace.java
index 84edfba18..a7c12387a 100644
--- a/gluten-core/src/main/java/org/apache/gluten/vectorized/JniWorkspace.java
+++ b/gluten-core/src/main/java/org/apache/gluten/vectorized/JniWorkspace.java
@@ -65,7 +65,8 @@ public class JniWorkspace {
private static JniWorkspace createDefault() {
try {
final String tempRoot =
- SparkDirectoryUtil.namespace("jni")
+ SparkDirectoryUtil.get()
+ .namespace("jni")
.mkChildDirRandomly(UUID.randomUUID().toString())
.getAbsolutePath();
return createOrGet(tempRoot);
diff --git
a/gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala
b/gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala
index 1d5c21736..435ee8df7 100644
--- a/gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala
+++ b/gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala
@@ -16,7 +16,7 @@
*/
package org.apache.spark.util
-import org.apache.spark.SparkEnv
+import org.apache.spark.SparkConf
import org.apache.spark.internal.Logging
import _root_.org.apache.gluten.exception.GlutenException
@@ -30,8 +30,8 @@ import java.nio.file.Paths
* Manages Gluten's local directories, for storing jars, libs, spill files, or
other temporary
* stuffs.
*/
-object SparkDirectoryUtil extends Logging {
- private val ROOTS = Utils.getConfiguredLocalDirs(SparkEnv.get.conf).flatMap {
+class SparkDirectoryUtil private (roots: Array[String]) extends Logging {
+ private val ROOTS = roots.flatMap {
rootDir =>
try {
val localDir = Utils.createDirectory(rootDir, "gluten")
@@ -65,6 +65,33 @@ object SparkDirectoryUtil extends Logging {
}
}
+object SparkDirectoryUtil extends Logging {
+ private var INSTANCE: SparkDirectoryUtil = _
+
+ def init(conf: SparkConf): Unit = synchronized {
+ val roots = Utils.getConfiguredLocalDirs(conf)
+ init(roots)
+ }
+
+ private def init(roots: Array[String]): Unit = synchronized {
+ if (INSTANCE == null) {
+ INSTANCE = new SparkDirectoryUtil(roots)
+ return
+ }
+ if (INSTANCE.ROOTS.toSet != roots.toSet) {
+ logWarning(
+ s"Reinitialize SparkDirectoryUtil with different root dirs: old:
${INSTANCE.ROOTS
+ .mkString("Array(", ", ", ")")}, new: ${roots.mkString("Array(",
", ", ")")}"
+ )
+ }
+ }
+
+ def get(): SparkDirectoryUtil = synchronized {
+ assert(INSTANCE != null, "Default instance of SparkDirectoryUtil was not
set yet")
+ INSTANCE
+ }
+}
+
class Namespace(private val parents: Array[File], private val name: String) {
val all = parents.map {
root =>
diff --git
a/gluten-data/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
b/gluten-data/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
index e817c844d..e54724a59 100644
---
a/gluten-data/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
+++
b/gluten-data/src/main/java/org/apache/gluten/vectorized/NativePlanEvaluator.java
@@ -90,7 +90,8 @@ public class NativePlanEvaluator {
final long memoryManagerHandle = nmm.getNativeInstanceHandle();
final String spillDirPath =
- SparkDirectoryUtil.namespace("gluten-spill")
+ SparkDirectoryUtil.get()
+ .namespace("gluten-spill")
.mkChildDirRoundRobin(UUID.randomUUID().toString())
.getAbsolutePath();
diff --git
a/gluten-data/src/main/scala/org/apache/spark/shuffle/ColumnarShuffleWriter.scala
b/gluten-data/src/main/scala/org/apache/spark/shuffle/ColumnarShuffleWriter.scala
index f8867b255..fb933866c 100644
---
a/gluten-data/src/main/scala/org/apache/spark/shuffle/ColumnarShuffleWriter.scala
+++
b/gluten-data/src/main/scala/org/apache/spark/shuffle/ColumnarShuffleWriter.scala
@@ -56,6 +56,7 @@ class ColumnarShuffleWriter[K, V](
private var mapStatus: MapStatus = _
private val localDirs = SparkDirectoryUtil
+ .get()
.namespace("shuffle-write")
.all
.map(_.getAbsolutePath)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]