This is an automated email from the ASF dual-hosted git repository.

nicholasjiang pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/celeborn.git


The following commit(s) were added to refs/heads/main by this push:
     new 00bcf8bc5 [CELEBORN-2198] Fix NPE in tryWithTimeoutAndCallback test 
due to lazy deviceCheckThreadPool not initialized
00bcf8bc5 is described below

commit 00bcf8bc5379b5e43607655c0242e11112f1bf7f
Author: zhouhai <[email protected]>
AuthorDate: Thu Nov 6 14:40:29 2025 +0800

    [CELEBORN-2198] Fix NPE in tryWithTimeoutAndCallback test due to lazy 
deviceCheckThreadPool not initialized
    
    ### What changes were proposed in this pull request?
    
    Use fallback thread pool in test when DeviceMonitor.deviceCheckThreadPool 
is uninitialized.
    
    ### Why are the changes needed?
    
    The unit test `tryWithTimeoutAndCallback` in `DeviceMonitorSuite` fails 
with `NullPointerException` when run in isolation. The root cause is as follows:
    
    `DeviceMonitor.deviceCheckThreadPool` is  lazily initialized, initialized 
only when DeviceMonitor.createDeviceMonitor() is first called.
    
    ```
    java.lang.NullPointerException was thrown.
    java.lang.NullPointerException
      at 
org.apache.celeborn.common.util.Utils$.tryWithTimeoutAndCallback(Utils.scala:1028)
      at 
org.apache.celeborn.service.deploy.worker.storage.DeviceMonitorSuite.$anonfun$new$17(DeviceMonitorSuite.scala:371)
      at 
org.apache.celeborn.service.deploy.worker.storage.DeviceMonitorSuite.$anonfun$new$17$adapted(DeviceMonitorSuite.scala:368)
      at scala.collection.immutable.Range.foreach(Range.scala:158)
      at 
org.apache.celeborn.service.deploy.worker.storage.DeviceMonitorSuite.$anonfun$new$14(DeviceMonitorSuite.scala:368)
    ```
    
    ### Does this PR resolve a correctness bug?
    
    No.
    
    ### Does this PR introduce _any_ user-facing change?
    
    No.
    
    ### How was this patch tested?
    
    Re-ran the test in isolation and full suite — all pass.
    
    Closes #3529 from yew1eb/CELEBORN-2198.
    
    Authored-by: zhouhai <[email protected]>
    Signed-off-by: SteNicholas <[email protected]>
---
 .../service/deploy/worker/storage/DeviceMonitorSuite.scala    | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git 
a/worker/src/test/scala/org/apache/celeborn/service/deploy/worker/storage/DeviceMonitorSuite.scala
 
b/worker/src/test/scala/org/apache/celeborn/service/deploy/worker/storage/DeviceMonitorSuite.scala
index dad38126b..7a8e996ae 100644
--- 
a/worker/src/test/scala/org/apache/celeborn/service/deploy/worker/storage/DeviceMonitorSuite.scala
+++ 
b/worker/src/test/scala/org/apache/celeborn/service/deploy/worker/storage/DeviceMonitorSuite.scala
@@ -32,7 +32,7 @@ import org.apache.celeborn.common.CelebornConf
 import 
org.apache.celeborn.common.CelebornConf.WORKER_DISK_MONITOR_CHECK_INTERVAL
 import org.apache.celeborn.common.meta.{DeviceInfo, DiskInfo, DiskStatus}
 import org.apache.celeborn.common.protocol.StorageInfo
-import org.apache.celeborn.common.util.Utils
+import org.apache.celeborn.common.util.{ThreadUtils, Utils}
 import org.apache.celeborn.service.deploy.worker.WorkerSource
 
 class DeviceMonitorSuite extends AnyFunSuite {
@@ -356,6 +356,11 @@ class DeviceMonitorSuite extends AnyFunSuite {
   }
 
   test("tryWithTimeoutAndCallback") {
+    val pool = Option(DeviceMonitor.deviceCheckThreadPool).getOrElse {
+      // Initialize a temporary pool for test
+      ThreadUtils.newDaemonCachedThreadPool("test-worker-device-checker")
+    }
+
     val fn = (i: Int) => {
       0 until 100 foreach (x => {
         // scalastyle:off println
@@ -368,10 +373,10 @@ class DeviceMonitorSuite extends AnyFunSuite {
     0 until 3 foreach (i => {
       val result = Utils.tryWithTimeoutAndCallback({
         fn(i)
-      })(false)(DeviceMonitor.deviceCheckThreadPool, 1)
+      })(false)(pool, 1)
       assert(!result)
     })
-    DeviceMonitor.deviceCheckThreadPool.shutdownNow()
+    pool.shutdownNow()
   }
 
   test("monitor non-critical error metrics") {

Reply via email to