This is an automated email from the ASF dual-hosted git repository.
yongwww pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 8fbc7722a4 [Fix] Resolve deadlock in PopenPoolExecutor and
LocalBuilder (#18219)
8fbc7722a4 is described below
commit 8fbc7722a4d53260f8e3ace9eadcd09c2237c5f6
Author: Wei Wang <[email protected]>
AuthorDate: Thu Aug 21 00:10:34 2025 +0800
[Fix] Resolve deadlock in PopenPoolExecutor and LocalBuilder (#18219)
- Add explicit shutdown flag in PopenPoolExecutor
- Replace del with explicit shutdown() calls in LocalBuilder
---
python/tvm/contrib/popen_pool.py | 31 ++++++++++++++++++++++-
python/tvm/meta_schedule/builder/local_builder.py | 4 +--
2 files changed, 32 insertions(+), 3 deletions(-)
diff --git a/python/tvm/contrib/popen_pool.py b/python/tvm/contrib/popen_pool.py
index d16cf31bc7..97f343bbca 100644
--- a/python/tvm/contrib/popen_pool.py
+++ b/python/tvm/contrib/popen_pool.py
@@ -366,11 +366,39 @@ class PopenPoolExecutor:
self._maximum_process_uses = maximum_process_uses
self._stdout = stdout
self._stderr = stderr
+ self._shutdown = False
if self._initializer is not None and not callable(self._initializer):
raise TypeError("initializer must be callable for
PopenPoolExecutor")
def __del__(self):
+ """Destructor.
+
+ Note
+ ----
+ Called during garbage collection. This may be called later than
expected.
+ Always call shutdown() explicitly to avoid deadlocks.
+ """
+ if not self._shutdown:
+ self.shutdown(wait=True)
+
+ def shutdown(self, wait=True):
+ """Shutdown the executor and clean up resources.
+
+ Parameters
+ ----------
+ wait : bool
+ If True, wait for pending work to complete.
+
+ Note
+ ----
+ DEADLOCK WARNING: This method can deadlock when called during garbage
+ collection due to exception reference cycles. When exceptions occur,
+ Python creates reference cycles that delay garbage collection. The
+ deadlock happens when: exception creates reference cycle → new pool
+ creates worker → GC cleans old pool → old pool's __del__ calls
shutdown()
+ which tries to acquire locks again.
+ """
self._lock.acquire()
for worker in self._worker_map.values():
try:
@@ -378,7 +406,8 @@ class PopenPoolExecutor:
except ImportError:
pass
self._lock.release()
- self._threadpool.shutdown()
+ self._threadpool.shutdown(wait=wait)
+ self._shutdown = True
def _worker_run(self, fn, args, kwargs):
"""Internal thread runner."""
diff --git a/python/tvm/meta_schedule/builder/local_builder.py
b/python/tvm/meta_schedule/builder/local_builder.py
index ff738c6265..0f68ef7afb 100644
--- a/python/tvm/meta_schedule/builder/local_builder.py
+++ b/python/tvm/meta_schedule/builder/local_builder.py
@@ -192,7 +192,7 @@ class LocalBuilder(PyBuilder):
)
else:
raise ValueError("Unreachable: unexpected result:
{map_result}")
- del pool
+ pool.shutdown()
return results
def _sanity_check(self) -> None:
@@ -208,7 +208,7 @@ class LocalBuilder(PyBuilder):
)
value = pool.submit(_check, self.f_build, self.f_export)
value.result()
- del pool
+ pool.shutdown()
def _worker_func(