zhztheplayer commented on code in PR #10708:
URL:
https://github.com/apache/incubator-gluten/pull/10708#discussion_r2355594919
##########
gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala:
##########
@@ -67,28 +68,24 @@ class SparkDirectoryUtil private (val roots: Array[String])
extends Logging {
}
object SparkDirectoryUtil extends Logging {
+ private val INSTANCE_INITIALIZED = new AtomicBoolean(false)
private var INSTANCE: SparkDirectoryUtil = _
- def init(conf: SparkConf): Unit = synchronized {
+ def init(conf: SparkConf): Unit = {
val roots = Utils.getConfiguredLocalDirs(conf)
init(roots)
}
- private def init(roots: Array[String]): Unit = synchronized {
- if (INSTANCE == null) {
+ private def init(roots: Array[String]): Unit = {
+ if (INSTANCE_INITIALIZED.compareAndSet(false, true)) {
INSTANCE = new SparkDirectoryUtil(roots)
- return
- }
- if (INSTANCE.roots.toSet != roots.toSet) {
- throw new IllegalArgumentException(
- s"Reinitialize SparkDirectoryUtil with different root dirs: old:
${INSTANCE.ROOTS
- .mkString("Array(", ", ", ")")}, new: ${roots.mkString("Array(",
", ", ")")}"
- )
+ } else {
+ logWarning("SparkDirectoryUtil is already initialized")
}
}
- def get(): SparkDirectoryUtil = synchronized {
- assert(INSTANCE != null, "Default instance of SparkDirectoryUtil was not
set yet")
+ def get(): SparkDirectoryUtil = {
+ assert(INSTANCE_INITIALIZED.get(), "Default instance of SparkDirectoryUtil
was not set yet")
INSTANCE
}
Review Comment:
Hi @beliefer, thank you for keeping iterating the code, but the it is still
problematic :(
When thread 1 reaches line 81 but hasn't yet set `INSTANCE`, thread 2 can
pass line 88 and access `INSTANCE` which may give an unexpected result to
caller.
I know it's a corner case, but we should make sure the new code covers what
is covered by the old code completely.
##########
gluten-core/src/main/scala/org/apache/spark/util/SparkDirectoryUtil.scala:
##########
@@ -67,28 +68,24 @@ class SparkDirectoryUtil private (val roots: Array[String])
extends Logging {
}
object SparkDirectoryUtil extends Logging {
+ private val INSTANCE_INITIALIZED = new AtomicBoolean(false)
private var INSTANCE: SparkDirectoryUtil = _
- def init(conf: SparkConf): Unit = synchronized {
+ def init(conf: SparkConf): Unit = {
val roots = Utils.getConfiguredLocalDirs(conf)
init(roots)
}
- private def init(roots: Array[String]): Unit = synchronized {
- if (INSTANCE == null) {
+ private def init(roots: Array[String]): Unit = {
+ if (INSTANCE_INITIALIZED.compareAndSet(false, true)) {
INSTANCE = new SparkDirectoryUtil(roots)
- return
- }
- if (INSTANCE.roots.toSet != roots.toSet) {
- throw new IllegalArgumentException(
- s"Reinitialize SparkDirectoryUtil with different root dirs: old:
${INSTANCE.ROOTS
- .mkString("Array(", ", ", ")")}, new: ${roots.mkString("Array(",
", ", ")")}"
- )
+ } else {
+ logWarning("SparkDirectoryUtil is already initialized")
}
}
- def get(): SparkDirectoryUtil = synchronized {
- assert(INSTANCE != null, "Default instance of SparkDirectoryUtil was not
set yet")
+ def get(): SparkDirectoryUtil = {
+ assert(INSTANCE_INITIALIZED.get(), "Default instance of SparkDirectoryUtil
was not set yet")
INSTANCE
}
Review Comment:
Hi @beliefer, thank you for keeping iterating the code, but it is still
problematic :(
When thread 1 reaches line 81 but hasn't yet set `INSTANCE`, thread 2 can
pass line 88 and access `INSTANCE` which may give an unexpected result to
caller.
I know it's a corner case, but we should make sure the new code covers what
is covered by the old code completely.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]