(tvm) branch unity updated: [Unity] Ensure memory planning cross-function independence (#16318)

junrushao Sun, 31 Dec 2023 10:10:14 -0800

This is an automated email from the ASF dual-hosted git repository.

junrushao pushed a commit to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/unity by this push:
     new 2f7e0d578f [Unity] Ensure memory planning cross-function independence 
(#16318)
2f7e0d578f is described below

commit 2f7e0d578f777a630bb6e4a79d9c2ec52b7be461
Author: Ruihang Lai <[email protected]>
AuthorDate: Sun Dec 31 13:10:02 2023 -0500

    [Unity] Ensure memory planning cross-function independence (#16318)
    
    Prior to this PR, the memory planning for different Relax functions
    are not independent -- storage tokens are shared across different
    Relax functions.
    
    This will incur memory overuse sometimes. For example, tensor `A`
    in `func1` has 128 bytes, tensor `B` in `func2` has 2048 bytes.
    If the memory planning decides to share the storage token for `A`
    and `B`, the shared token will have size 2048 bytes.
    
    Consider the case when at runtime only `func1` is executed, and
    `func2` is never invoked. In this case, only 128 bytes for tensor
    `A` is needed, while a total 2048-chunk is allocated in total,
    which is a 16x memory overuse.
    
    This PR makes the memory planning across different Relax function
    independent. That means in the example above, when only `func1`
    is executed, only 128 bytes will be allocated.
---
 src/relax/transform/static_plan_block_memory.cc    |  8 +++
 .../test_transform_static_plan_block_memory.py     | 73 +++++++++++++++++++++-
 2 files changed, 80 insertions(+), 1 deletion(-)

diff --git a/src/relax/transform/static_plan_block_memory.cc 
b/src/relax/transform/static_plan_block_memory.cc
index 4a2a1555ff..3873f624ef 100644
--- a/src/relax/transform/static_plan_block_memory.cc
+++ b/src/relax/transform/static_plan_block_memory.cc
@@ -219,6 +219,12 @@ class TokenAllocator1D {
     available_pool_[token->dtype].insert({token->bytes, token});
   }
 
+  /*! \brief Clear the allocator. */
+  void Clear() {
+    available_pool_.clear();
+    full_pool_.clear();
+  }
+
  private:
   /*! \brief A constant scale representing the token search range. */
   const int match_range_{16};
@@ -569,6 +575,8 @@ class StorageAllocator : public StorageAllocatorBaseVisitor 
{
       if (func == nullptr) {
         continue;
       }
+      // Clear the allocator to make the planning of different functions 
independent.
+      allocator_.Clear();
       this->VisitExpr_(func);
     }
   }
diff --git a/tests/python/relax/test_transform_static_plan_block_memory.py 
b/tests/python/relax/test_transform_static_plan_block_memory.py
index 0c24f90efc..f12b5b9fc1 100644
--- a/tests/python/relax/test_transform_static_plan_block_memory.py
+++ b/tests/python/relax/test_transform_static_plan_block_memory.py
@@ -18,7 +18,9 @@
 import tvm
 import tvm.testing
 from tvm import relax
-from tvm.script import ir as I, relax as R, tir as T
+from tvm.script import ir as I
+from tvm.script import relax as R
+from tvm.script import tir as T
 
 
 def test_basic():
@@ -1105,5 +1107,74 @@ def test_call_tir_dyn():
     tvm.ir.assert_structural_equal(mod, Expected)
 
 
+def test_function_independence():
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def exp(A: T.handle, B: T.handle):
+            T.evaluate(0)
+
+        @R.function
+        def func1(x: R.Tensor((8,), dtype="float32")) -> R.Tensor((8,), 
dtype="float32"):
+            R.func_attr({"relax.force_pure": 1})
+            cls = Module
+            alloc: R.Tensor((8,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([8,]), dtype="float32", runtime_device_index=0)
+            _: R.Tuple() = cls.exp(x, alloc)
+            lv: R.Tensor((8,), dtype="float32") = alloc
+            alloc1: R.Tensor((8,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([8,]), dtype="float32", runtime_device_index=0)
+            _1: R.Tuple() = cls.exp(lv, alloc1)
+            gv: R.Tensor((8,), dtype="float32") = alloc1
+            return gv
+
+        @R.function
+        def func2(x: R.Tensor((10,), dtype="float32")) -> R.Tensor((10,), 
dtype="float32"):
+            R.func_attr({"relax.force_pure": 1})
+            cls = Module
+            alloc: R.Tensor((10,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([10,]), dtype="float32", runtime_device_index=0)
+            _: R.Tuple() = cls.exp(x, alloc)
+            lv: R.Tensor((10,), dtype="float32") = alloc
+            alloc1: R.Tensor((10,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([10,]), dtype="float32", runtime_device_index=0)
+            _1: R.Tuple() = cls.exp(lv, alloc1)
+            gv: R.Tensor((10,), dtype="float32") = alloc1
+            return gv
+
+    @I.ir_module
+    class Expected:
+        @T.prim_func
+        def exp(A: T.handle, B: T.handle):
+            T.evaluate(0)
+
+        @R.function
+        def func1(x: R.Tensor((8,), dtype="float32")) -> R.Tensor((8,), 
dtype="float32"):
+            R.func_attr({"relax.force_pure": 1})
+            cls = Expected
+            storage: R.Object = R.memory.alloc_storage(R.shape([32]), 
R.prim_value(0), R.str("global"), R.dtype("float32"))
+            alloc: R.Tensor((8,), dtype="float32") = 
R.memory.alloc_tensor(storage, R.prim_value(0), R.shape([8]), 
R.dtype("float32"))
+            _: R.Tuple = cls.exp(x, alloc)
+            lv: R.Tensor((8,), dtype="float32") = alloc
+            alloc1: R.Tensor((8,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([8]), R.dtype("float32"), R.prim_value(0))
+            _1: R.Tuple = cls.exp(lv, alloc1)
+            gv: R.Tensor((8,), dtype="float32") = alloc1
+            return gv
+
+        @R.function
+        def func2(x: R.Tensor((10,), dtype="float32")) -> R.Tensor((10,), 
dtype="float32"):
+            R.func_attr({"relax.force_pure": 1})
+            cls = Expected
+            storage1: R.Object = R.memory.alloc_storage(R.shape([40]), 
R.prim_value(0), R.str("global"), R.dtype("float32"))
+            alloc: R.Tensor((10,), dtype="float32") = 
R.memory.alloc_tensor(storage1, R.prim_value(0), R.shape([10]), 
R.dtype("float32"))
+            _: R.Tuple = cls.exp(x, alloc)
+            lv: R.Tensor((10,), dtype="float32") = alloc
+            alloc1: R.Tensor((10,), dtype="float32") = 
R.builtin.alloc_tensor(R.shape([10]), R.dtype("float32"), R.prim_value(0))
+            _1: R.Tuple = cls.exp(lv, alloc1)
+            gv: R.Tensor((10,), dtype="float32") = alloc1
+            return gv
+    # fmt: on
+
+    mod = relax.transform.StaticPlanBlockMemory()(Module)
+    tvm.ir.assert_structural_equal(mod, Expected)
+
+
 if __name__ == "__main__":
     tvm.testing.main()

(tvm) branch unity updated: [Unity] Ensure memory planning cross-function independence (#16318)

Reply via email to