This is an automated email from the ASF dual-hosted git repository.

yongwww pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 8fbc7722a4 [Fix] Resolve deadlock in PopenPoolExecutor and 
LocalBuilder (#18219)
8fbc7722a4 is described below

commit 8fbc7722a4d53260f8e3ace9eadcd09c2237c5f6
Author: Wei Wang <[email protected]>
AuthorDate: Thu Aug 21 00:10:34 2025 +0800

    [Fix] Resolve deadlock in PopenPoolExecutor and LocalBuilder (#18219)
    
    - Add explicit shutdown flag in PopenPoolExecutor
    - Replace del with explicit shutdown() calls in LocalBuilder
---
 python/tvm/contrib/popen_pool.py                  | 31 ++++++++++++++++++++++-
 python/tvm/meta_schedule/builder/local_builder.py |  4 +--
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/python/tvm/contrib/popen_pool.py b/python/tvm/contrib/popen_pool.py
index d16cf31bc7..97f343bbca 100644
--- a/python/tvm/contrib/popen_pool.py
+++ b/python/tvm/contrib/popen_pool.py
@@ -366,11 +366,39 @@ class PopenPoolExecutor:
         self._maximum_process_uses = maximum_process_uses
         self._stdout = stdout
         self._stderr = stderr
+        self._shutdown = False
 
         if self._initializer is not None and not callable(self._initializer):
             raise TypeError("initializer must be callable for 
PopenPoolExecutor")
 
     def __del__(self):
+        """Destructor.
+
+        Note
+        ----
+        Called during garbage collection. This may be called later than 
expected.
+        Always call shutdown() explicitly to avoid deadlocks.
+        """
+        if not self._shutdown:
+            self.shutdown(wait=True)
+
+    def shutdown(self, wait=True):
+        """Shutdown the executor and clean up resources.
+
+        Parameters
+        ----------
+        wait : bool
+            If True, wait for pending work to complete.
+
+        Note
+        ----
+        DEADLOCK WARNING: This method can deadlock when called during garbage
+        collection due to exception reference cycles. When exceptions occur,
+        Python creates reference cycles that delay garbage collection. The
+        deadlock happens when: exception creates reference cycle → new pool
+        creates worker → GC cleans old pool → old pool's __del__ calls 
shutdown()
+        which tries to acquire locks again.
+        """
         self._lock.acquire()
         for worker in self._worker_map.values():
             try:
@@ -378,7 +406,8 @@ class PopenPoolExecutor:
             except ImportError:
                 pass
         self._lock.release()
-        self._threadpool.shutdown()
+        self._threadpool.shutdown(wait=wait)
+        self._shutdown = True
 
     def _worker_run(self, fn, args, kwargs):
         """Internal thread runner."""
diff --git a/python/tvm/meta_schedule/builder/local_builder.py 
b/python/tvm/meta_schedule/builder/local_builder.py
index ff738c6265..0f68ef7afb 100644
--- a/python/tvm/meta_schedule/builder/local_builder.py
+++ b/python/tvm/meta_schedule/builder/local_builder.py
@@ -192,7 +192,7 @@ class LocalBuilder(PyBuilder):
                 )
             else:
                 raise ValueError("Unreachable: unexpected result: 
{map_result}")
-        del pool
+        pool.shutdown()
         return results
 
     def _sanity_check(self) -> None:
@@ -208,7 +208,7 @@ class LocalBuilder(PyBuilder):
         )
         value = pool.submit(_check, self.f_build, self.f_export)
         value.result()
-        del pool
+        pool.shutdown()
 
 
 def _worker_func(

Reply via email to