This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm-ffi.git
The following commit(s) were added to refs/heads/main by this push:
new a97b7c6 [TEST] Fix the tensor loading order in test (#128)
a97b7c6 is described below
commit a97b7c600885a33023c5332d74ef63c40a0ce3e4
Author: Tianqi Chen <[email protected]>
AuthorDate: Wed Oct 15 11:37:29 2025 -0400
[TEST] Fix the tensor loading order in test (#128)
If we load a module within a local scope,
that object's deleter resides in the loaded library. if the module is
unloaded before the object is destroyed, the deleter may call an invalid
address. Keep the module loaded until all returned objects are deleted.
This PR fixes the testcase that have the related issues
---
python/tvm_ffi/module.py | 27 +++++++++++++++++++++++++++
tests/python/test_load_inline.py | 32 +++++++++++++++++++++++---------
2 files changed, 50 insertions(+), 9 deletions(-)
diff --git a/python/tvm_ffi/module.py b/python/tvm_ffi/module.py
index ff05f93..659a641 100644
--- a/python/tvm_ffi/module.py
+++ b/python/tvm_ffi/module.py
@@ -54,6 +54,33 @@ class Module(core.Object):
--------
:py:func:`tvm_ffi.load_module`
+ Notes
+ -----
+ If you load a module within a local scope, be careful when any called
function
+ creates and returns an object. The memory deallocation routines are part of
+ the library's code. If the module is unloaded before the object is
destroyed,
+ the deleter may call an invalid address. Keep the module loaded until all
returned
+ objects are deleted. You can safely use returned objects inside a nested
function
+ that finishes before the module goes out of scope. When possible, consider
keeping
+ the module alive in a long-lived/global scope (for example, in a global
state) to
+ avoid premature unloading.
+
+ .. code-block:: python
+
+ def bad_pattern(x):
+ # Bad: unload order of `tensor` and `mod` is not guaranteed
+ mod = tvm_ffi.load_module("path/to/library.so")
+ # ... do something with the tensor
+ tensor = mod.func_create_and_return_tensor(x)
+
+ def good_pattern(x):
+ # Good: `tensor` is freed before `mod` goes out of scope
+ mod = tvm_ffi.load_module("path/to/library.so")
+ def run_some_tests():
+ tensor = mod.func_create_and_return_tensor(x)
+ # ... do something with the tensor
+ run_some_tests()
+
"""
# tvm-ffi-stubgen(begin): object/ffi.Module
diff --git a/tests/python/test_load_inline.py b/tests/python/test_load_inline.py
index 299e4c8..5b3e18d 100644
--- a/tests/python/test_load_inline.py
+++ b/tests/python/test_load_inline.py
@@ -243,7 +243,14 @@ def test_load_inline_with_env_tensor_allocator() -> None:
""",
functions=["return_add_one"],
)
- if torch is not None:
+ assert torch is not None
+
+ def run_check() -> None:
+ """Must run in a separate function to ensure deletion happens before
mod unloads.
+
+ When a module returns an object, the object deleter address is part of
the
+ loaded library. We need to keep the module loaded until the object is
deleted.
+ """
x_cpu = torch.asarray([1, 2, 3, 4, 5], dtype=torch.float32,
device="cpu")
# test support for nested container passing
y_cpu = mod.return_add_one({"x": [x_cpu]})
@@ -252,6 +259,8 @@ def test_load_inline_with_env_tensor_allocator() -> None:
assert y_cpu.dtype == torch.float32
torch.testing.assert_close(x_cpu + 1, y_cpu)
+ run_check()
+
@pytest.mark.skipif(
torch is None or not torch.cuda.is_available(), reason="Requires torch and
CUDA"
@@ -341,11 +350,16 @@ def test_cuda_memory_alloc_noleak() -> None:
""",
functions=["return_tensor"],
)
- x = torch.arange(1024 * 1024, dtype=torch.float32, device="cuda")
- current_allocated = torch.cuda.memory_allocated()
- repeat = 8
- for i in range(repeat):
- mod.return_tensor(x)
- diff = torch.cuda.memory_allocated() - current_allocated
- # memory should not grow as we loop over
- assert diff <= 1024**2 * 8
+
+ def run_check() -> None:
+ """Must run in a separate function to ensure deletion happens before
mod unloads."""
+ x = torch.arange(1024 * 1024, dtype=torch.float32, device="cuda")
+ current_allocated = torch.cuda.memory_allocated()
+ repeat = 8
+ for i in range(repeat):
+ mod.return_tensor(x)
+ diff = torch.cuda.memory_allocated() - current_allocated
+ # memory should not grow as we loop over
+ assert diff <= 1024**2 * 8
+
+ run_check()