This is an automated email from the ASF dual-hosted git repository.

manupa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 079eb4e  [microNPU] Add a pass to move allocate nodes to the outer 
scope (#10725)
079eb4e is described below

commit 079eb4e992868da6f0b5b345e7ac80e60273f6ba
Author: Luke Hutton <[email protected]>
AuthorDate: Fri Mar 25 18:23:17 2022 +0000

    [microNPU] Add a pass to move allocate nodes to the outer scope (#10725)
    
    * [microNPU] Add a pass to move allocate nodes to the outer scope
    
    Adds a pass called `HoistAllocates` to move allocate nodes to the top
    of the body of the main function. In doing so, it opens the door to
    other optimizations that need to swap the ordering of external calls.
    
    Pass illustration:
    (before)
    ```
    allocate {
        extern_call {
            allocate {
                extern_call {
    
                }
            }
        }
    }
    ```
    
    (after)
    ```
    allocate {
        allocate {
            extern_call
            extern_call
        }
    }
    ```
    
    Change-Id: Ibcfc3c75b15deebb5c6645a4923a6ddf683b37c4
    
    * address comments
    
    * uses prim func pass, rather than module pass.
    * adds error message informing user to run this pass with LowerToTIR()
      pass for now.
    
    Change-Id: I57757b9dc5bff0208034a974a341c09cce0294bc
    
    * Support allocates when not followed by a sequence statement
    
    With a test to back this case up.
    
    Change-Id: I670809f5ee53b583a15d9b783852dda3089756e9
    
    * Add new directory tir/contrib/ethosu to cmake build
    
    Change-Id: I3e9f24adfe992ace4e03238a18a8378b03257e1a
---
 cmake/modules/contrib/EthosU.cmake                 |   3 +-
 .../tvm/relay/backend/contrib/ethosu/_ffi_api.py   |   1 +
 .../relay/backend/contrib/ethosu/tir/compiler.py   |   1 +
 .../tvm/relay/backend/contrib/ethosu/tir/passes.py |  28 +-
 src/tir/contrib/ethosu/passes.cc                   | 128 ++++++++++
 .../contrib/test_ethosu/test_hoist_allocates.py    | 284 +++++++++++++++++++++
 6 files changed, 437 insertions(+), 8 deletions(-)

diff --git a/cmake/modules/contrib/EthosU.cmake 
b/cmake/modules/contrib/EthosU.cmake
index 0edeae3..bdd8846 100644
--- a/cmake/modules/contrib/EthosU.cmake
+++ b/cmake/modules/contrib/EthosU.cmake
@@ -19,7 +19,8 @@ if(USE_ETHOSU)
   tvm_file_glob(GLOB COMPILER_ETHOSU_SRCS
                 src/relay/backend/contrib/ethosu/*
                 src/contrib/ethosu/cascader/*
-                src/contrib/ethosu/cascader/parts/*)
+                src/contrib/ethosu/cascader/parts/*
+                src/tir/contrib/ethosu/*)
   list(APPEND COMPILER_SRCS ${COMPILER_ETHOSU_SRCS})
 else()
   # Keeping just utils.cc because it has Object definitions
diff --git a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py 
b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py
index 22eb982..2057790 100644
--- a/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py
+++ b/python/tvm/relay/backend/contrib/ethosu/_ffi_api.py
@@ -18,3 +18,4 @@
 import tvm._ffi  # type: ignore
 
 tvm._ffi._init_api("relay.ext.ethos-u", __name__)
+tvm._ffi._init_api("tir.contrib.ethos-u", __name__)
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py 
b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
index aa15d91..707f6b6 100644
--- a/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
+++ b/python/tvm/relay/backend/contrib/ethosu/tir/compiler.py
@@ -88,6 +88,7 @@ def lower_ethosu(sch, args, const_dict, name="main"):
         mod = ethosu_passes.ReplaceOperators()(mod)
         mod = tvm.tir.transform.RemoveNoOp()(mod)
         mod, const_dict = ethosu_passes.EncodeConstants(const_dict)(mod)
+        mod = ethosu_passes.HoistAllocates()(mod)
         disable_storage_rewrite = curr_cfg.get("tir.disable_storage_rewrite", 
False)
         if not disable_storage_rewrite:
             mod = tvm.tir.transform.StorageRewrite()(mod)
diff --git a/python/tvm/relay/backend/contrib/ethosu/tir/passes.py 
b/python/tvm/relay/backend/contrib/ethosu/tir/passes.py
index 5f0b9fe..5c143815 100644
--- a/python/tvm/relay/backend/contrib/ethosu/tir/passes.py
+++ b/python/tvm/relay/backend/contrib/ethosu/tir/passes.py
@@ -30,6 +30,8 @@ from .unary_elementwise import get_unary_elementwise_params
 from .transform import get_copy_params
 from .utils import get_weights_buffer, get_scale_bias_buffer
 
+from .. import _ffi_api
+
 
 def RemoveZeroStores():
     """This pass removes stores which just store zero to initialise buffers.
@@ -48,7 +50,7 @@ def RemoveZeroStores():
         )
 
     return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.ethosu.remove_zero_stores"
+        _ftransform, opt_level=0, name="tir.contrib.ethos-u.remove_zero_stores"
     )
 
 
@@ -207,7 +209,7 @@ def ReplaceOperators():
         )
 
     return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.ethosu.replace_operators"
+        _ftransform, opt_level=0, name="tir.contrib.ethos-u.replace_operators"
     )
 
 
@@ -296,7 +298,7 @@ def DivideConstants(const_dict):
 
     def _divide_constants(mod):
         transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, name="tir.ethosu.divide_constants"
+            _ftransform, opt_level=0, 
name="tir.contrib.ethos-u.divide_constants"
         )
         new_func = transform_func(mod)
         return new_func, new_const_dict
@@ -549,7 +551,7 @@ def EncodeConstants(const_dict):
         for key, value in divided_const_dict.items():
             const_dict[key] = value
         transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, name="tir.ethosu.encode_constants"
+            _ftransform, opt_level=0, 
name="tir.contrib.ethos-u.encode_constants"
         )
         new_func = transform_func(mod)
         return new_func, new_const_dict
@@ -584,7 +586,7 @@ def AnnotateAllocates():
         )
 
     return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.ethosu.annotate_allocates"
+        _ftransform, opt_level=0, name="tir.contrib.ethos-u.annotate_allocates"
     )
 
 
@@ -751,7 +753,7 @@ def RemoveConcatenates():
         )
 
     return tvm.tir.transform.prim_func_pass(
-        _ftransform, opt_level=0, name="tir.ethosu.remove_concatenates"
+        _ftransform, opt_level=0, 
name="tir.contrib.ethos-u.remove_concatenates"
     )
 
 
@@ -795,9 +797,21 @@ def CreatePrimFuncWithoutConstants(const_dict):
 
     def _create_primfunc_without_constants(mod):
         transform_func = tvm.tir.transform.prim_func_pass(
-            _ftransform, opt_level=0, 
name="tir.ethosu.CreatePrimFuncWithoutConstants"
+            _ftransform, opt_level=0, 
name="tir.contrib.ethos-u.CreatePrimFuncWithoutConstants"
         )
         mod = transform_func(mod)
         return mod, new_const_dict
 
     return _create_primfunc_without_constants
+
+
+def HoistAllocates() -> tvm.IRModule:
+    """
+    Hoist allocate nodes up to the top of the body of the main function.
+
+    Returns
+    -------
+    tvm.IRModule
+        The new module with hoisted allocate nodes.
+    """
+    return _ffi_api.HoistAllocates()
diff --git a/src/tir/contrib/ethosu/passes.cc b/src/tir/contrib/ethosu/passes.cc
new file mode 100644
index 0000000..7641071
--- /dev/null
+++ b/src/tir/contrib/ethosu/passes.cc
@@ -0,0 +1,128 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tir/contrib/ethosu/passes.cc
+ *
+ * \brief Passes used in TIR lowering for the microNPU compiler.
+ */
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/function.h>
+#include <tvm/tir/stmt_functor.h>
+#include <tvm/tir/transform.h>
+
+namespace tvm {
+namespace tir {
+namespace contrib {
+namespace ethosu {
+
+/*!
+ * \brief This mutator moves allocates to the top of the body of the main
+ * function.
+ *
+ * Note: This pass can currently only be run in conjunction with the
+ * LowerToTIR() pass as it expects a single primitive function called
+ * "main" that is being offloaded to the NPU.
+ *
+ * For example,
+ * Before:
+ *   allocate {
+ *       extern_call(...)
+ *           allocate {
+ *               extern_call(...)
+ *           }
+ *   }
+ *
+ * After:
+ *   allocate {
+ *       allocate {
+ *           extern_call(...)
+ *           extern_call(...)
+ *       }
+ *  }
+ */
+class HoistAllocatesMutator : public StmtExprMutator {
+ public:
+  HoistAllocatesMutator() {}
+
+  PrimFunc operator()(PrimFunc main_func) {
+    Stmt new_main_func_body = this->VisitStmt(main_func->body);
+
+    // Write all allocates that were removed in reverse order
+    for (auto it = allocates_.rbegin(); it != allocates_.rend(); it++) {
+      Allocate current_alloc = *it;
+      if (it != allocates_.rbegin()) {
+        new_main_func_body = SeqStmt({new_main_func_body});
+      }
+      new_main_func_body =
+          Allocate(current_alloc->buffer_var, current_alloc->dtype, 
current_alloc->extents,
+                   current_alloc->condition, new_main_func_body, 
current_alloc->annotations,
+                   current_alloc->span);
+    }
+
+    PrimFunc new_main_func =
+        PrimFunc(main_func->params, new_main_func_body, main_func->ret_type, 
main_func->buffer_map,
+                 main_func->preflattened_buffer_map, main_func->attrs);
+    return new_main_func;
+  }
+
+ private:
+  Stmt VisitStmt_(const AllocateNode* op) override {
+    allocates_.push_back(GetRef<Allocate>(op));
+
+    // Skip the allocate node itself
+    if (const auto* seq = op->body.as<SeqStmtNode>()) {
+      // Traverse the allocate body recursively and flatten
+      Array<Stmt> new_stmts;
+      new_stmts.reserve(seq->seq.size());
+      for (const Stmt& old_stmt : seq->seq) {
+        new_stmts.push_back(VisitStmt(old_stmt));
+      }
+      return SeqStmt::Flatten(new_stmts);
+    } else {
+      return VisitStmt(op->body);
+    }
+  }
+
+  /*! A stack to store allocates as they are visited. */
+  std::vector<Allocate> allocates_;
+};
+
+/*!
+ * \brief A pass to hoist allocate nodes to the top of the body of the main 
function.
+ *
+ * \return tvm::transform::Pass
+ */
+tvm::transform::Pass HoistAllocates() {
+  auto pass_func = [=](PrimFunc f, IRModule mod, tvm::transform::PassContext 
ctx) {
+    ICHECK(mod->GetGlobalVars().size() == 1 && mod->ContainGlobalVar("main"))
+        << "Expected a single primitive function called 'main'. Please run the 
HoistAllocates pass "
+           "in conjunction with the LowerToTIR() pass.";
+    return HoistAllocatesMutator()(f);
+  };
+  return tvm::tir::transform::CreatePrimFuncPass(pass_func, 0, 
"tir.contrib.ethos-u.HoistAllocates",
+                                                 {});
+}
+
+TVM_REGISTER_GLOBAL("tir.contrib.ethos-u.HoistAllocates").set_body_typed(HoistAllocates);
+
+}  // namespace ethosu
+}  // namespace contrib
+}  // namespace tir
+}  // namespace tvm
diff --git a/tests/python/contrib/test_ethosu/test_hoist_allocates.py 
b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
new file mode 100644
index 0000000..86143f9
--- /dev/null
+++ b/tests/python/contrib/test_ethosu/test_hoist_allocates.py
@@ -0,0 +1,284 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Testing the pass that moves allocate nodes to the body of the function.
+"""
+# pylint: disable=wrong-import-position
+
+import pytest
+
+pytest.importorskip("ethosu.vela")
+
+import tvm
+from tvm.script import tir as T
+from tvm.relay.backend.contrib.ethosu.tir.passes import HoistAllocates
+
+
+class ExtractAllocateInfo:
+    """
+    Extracts information from allocate nodes which we will use as sanity to 
check the allocate
+    after mutation.
+    """
+
+    def __init__(self):
+        self.allocates_info = []
+
+    def __call__(self, mod):
+        tvm.tir.stmt_functor.ir_transform(mod["main"].body, self._pre_visit, 
None, ["tir.Allocate"])
+        return self.allocates_info
+
+    def _pre_visit(self, stmt):
+        self.allocates_info.append(
+            {"extents": stmt.extents, "dtype": stmt.dtype, "condition": 
stmt.condition}
+        )
+
+
+def CheckAllocates(allocate_info):  # pylint: disable=invalid-name
+    """
+    Checks that all allocates have been visited before an external call has 
been visited.
+    Additionally, checks that the information for each allocate is what is 
expected.
+    """
+
+    allocate_idx = 0
+    expected_num_allocates = len(allocate_info)
+
+    def _pre_visit(stmt):
+        nonlocal allocate_idx, expected_num_allocates
+
+        if isinstance(stmt, tvm.tir.Allocate):
+            expected = allocate_info[allocate_idx]
+            assert (
+                stmt.extents == expected["extents"]
+            ), f"Allocate extents {stmt.extents} did not match expected 
{expected['extents']}"
+            assert (
+                stmt.dtype == expected["dtype"]
+            ), f"Allocate dtype {stmt.dtype} did not match expected 
{expected['dtype']}"
+            assert (
+                stmt.condition == expected["condition"]
+            ), f"Allocate condition {stmt.condition} did not match expected 
{expected['condition']}"
+
+            allocate_idx += 1
+        else:
+            assert (
+                allocate_idx == expected_num_allocates
+            ), "A call node was visited before all allocates"
+
+    def _ftransform(f, mod, ctx):
+        f.with_body(
+            tvm.tir.stmt_functor.ir_transform(
+                f.body, _pre_visit, None, ["tir.Allocate", "tir.Call"]
+            )
+        )
+
+    return tvm.tir.transform.prim_func_pass(_ftransform, opt_level=0)
+
+
+def test_double_convolution():
+    """
+    Test to check the HoistAllocates pass works on a function with two 
convolutions.
+    """
+
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(3402,), "int8"], placeholder_encoded: 
T.Buffer[(128,), "uint8"], placeholder_encoded_1: T.Buffer[(32,), "uint8"], 
placeholder_encoded_2: T.Buffer[(128,), "uint8"], placeholder_encoded_3: 
T.Buffer[(32,), "uint8"], ethosu_write: T.Buffer[(3402,), "int8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 27, 42, 3], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(placeholder_encoded, [3, 3, 2, 3], 
dtype="int8")
+            T.preflattened_buffer(placeholder_encoded_1, [3, 10], 
dtype="uint8")
+            T.preflattened_buffer(placeholder_encoded_2, [3, 3, 2, 3], 
dtype="int8")
+            T.preflattened_buffer(placeholder_encoded_3, [3, 10], 
dtype="uint8")
+            T.preflattened_buffer(ethosu_write, [1, 27, 42, 3], dtype="int8", 
data=ethosu_write.data)
+            # body
+            placeholder_global = T.allocate([128], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 
128, placeholder_global[0], dtype="handle"))
+            placeholder_d_global = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 
32, placeholder_d_global[0], dtype="handle"))
+            ethosu_write_2 = T.allocate([18144], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 
0, 42, placeholder[0], 0, 0, 0, T.float32(0.0039215646684169769), -128, "NHWC", 
126, 3, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, 
T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, 2, 3, 1, 1, 1, 2, 
placeholder_global[0], 128, 0, placeholder_d_global[0], 32, 2, 0, 2, 1, "NONE", 
0, 0, "TFL", "NONE", dtype="handle"))
+            placeholder_d_global_1 = T.allocate([128], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 
128, placeholder_d_global_1[0], dtype="handle"))
+            placeholder_d_global_2 = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 
32, placeholder_d_global_2[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 
0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, 
"NHCWB16", 672, 16, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write[0], 0, 0, 0, 
T.float32(0.23604340851306915), -128, "NHWC", 126, 3, 1, 2, 3, 1, 1, 1, 2, 
placeholder_d_global_1[0], 128, 0, placeholder_d_global_2[0], 32, 2, 0, 2, 1, 
"CLIP", -128, 127, "TFL", "NONE", dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_identities():
+    """
+    Test to check the HoistAllocates pass works on a function with multiple 
identity
+    operations, with no copy operations.
+    """
+
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(24,), "int8"], T_concat: 
T.Buffer[(24,), "int8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 2, 3, 4], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(T_concat, [24], dtype="int8", 
data=T_concat.data)
+            # body
+            ethosu_write = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, placeholder[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 
1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_1 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 
0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 
1, 1, 12, 0, 1, T_concat[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_2 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, placeholder[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_3 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 
4, 1, 0, 3, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, 
"AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 
dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 
0, 1, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 
1, 1, 12, 0, 1, T_concat[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_outer_seq_stmt():
+    """
+    Test to check the HoistAllocates pass works on a function where the 
outer-most statement is
+    a sequence statement, rather than the usual allocate.
+    """
+
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: 
T.Buffer[(2048,), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], 
buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), 
"uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: 
T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], 
buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), 
"uint8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 16, 16, 32], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(ethosu_write, [1, 16, 16, 8], dtype="int8", 
data=ethosu_write.data)
+            # body
+            with T.allocate([128], "uint8", "global") as placeholder_global:
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 
128, placeholder_global[0], dtype="handle"))
+                placeholder_d_global = T.allocate([32], "uint8", "global")
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 
32, placeholder_d_global[0], dtype="handle"))
+                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 
16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, 
"int8", 16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, 
"NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, 
placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, 
dtype="handle"))
+            with T.allocate([112], "uint8", "global") as placeholder_global_1:
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 
112, placeholder_global_1[0], dtype="handle"))
+                placeholder_d_global_1 = T.allocate([32], "uint8", "global")
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 
32, placeholder_d_global_1[0], dtype="handle"))
+                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 
16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, 
"int8", 16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, 
"NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, 
placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+            with T.allocate([112], "uint8", "global") as placeholder_global_2:
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 
112, placeholder_global_2[0], dtype="handle"))
+                placeholder_d_global_2 = T.allocate([32], "uint8", "global")
+                T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 
32, placeholder_d_global_2[0], dtype="handle"))
+                T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 
16, 0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, 
"int8", 16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, 
"NHWC", 128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, 
placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+            placeholder_global_3 = T.allocate([112], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, 
placeholder_global_3[0], dtype="handle"))
+            placeholder_d_global_3 = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, 
placeholder_d_global_3[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 
0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 
16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 
128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, 
placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_allocate_without_seq_stmt():
+    """
+    Tests the case when an allocate statement does not have a sequence 
statement as its body.
+    """
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(8192,), "int8"], ethosu_write: 
T.Buffer[(2048,), "int8"], buffer_encoded: T.Buffer[(128,), "uint8"], 
buffer_encoded_1: T.Buffer[(32,), "uint8"], buffer_encoded_2: T.Buffer[(112,), 
"uint8"], buffer_encoded_3: T.Buffer[(32,), "uint8"], buffer_encoded_4: 
T.Buffer[(112,), "uint8"], buffer_encoded_5: T.Buffer[(32,), "uint8"], 
buffer_encoded_6: T.Buffer[(112,), "uint8"], buffer_encoded_7: T.Buffer[(32,), 
"uint8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 16, 16, 32], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(ethosu_write, [1, 16, 16, 8], dtype="int8", 
data=ethosu_write.data)
+            # body
+            placeholder_global = T.allocate([128], "uint8", "global")
+            placeholder_global_1 = T.allocate([112], "uint8", "global")
+            placeholder_global_2 = T.allocate([112], "uint8", "global")
+            placeholder_d_global = T.allocate([32], "uint8", "global")
+            placeholder_d_global_1 = T.allocate([32], "uint8", "global")
+            placeholder_d_global_2 = T.allocate([32], "uint8", "global")
+            placeholder_global_3 = T.allocate([112], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded[0], 128, 
placeholder_global[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_1[0], 32, 
placeholder_d_global[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 
0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 
16, 16, 2, 16, 0, 16, ethosu_write[0], 0, 0, 0, T.float32(0.25), 14, "NHWC", 
128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global[0], 128, 12, 
placeholder_d_global[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 0, 
dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_2[0], 112, 
placeholder_global_1[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_3[0], 32, 
placeholder_d_global_1[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 
0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 
16, 16, 2, 16, 0, 16, ethosu_write[2], 0, 0, 0, T.float32(0.25), 14, "NHWC", 
128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_1[0], 112, 12, 
placeholder_d_global_1[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_4[0], 112, 
placeholder_global_2[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_5[0], 32, 
placeholder_d_global_2[0], dtype="handle"))
+            placeholder_d_global_3 = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 
0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 
16, 16, 2, 16, 0, 16, ethosu_write[4], 0, 0, 0, T.float32(0.25), 14, "NHWC", 
128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_2[0], 112, 12, 
placeholder_d_global_2[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_6[0], 112, 
placeholder_global_3[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_copy", buffer_encoded_7[0], 32, 
placeholder_d_global_3[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 16, 16, 32, 16, 
0, 16, placeholder[0], 0, 0, 0, T.float32(0.5), 10, "NHWC", 512, 32, 1, "int8", 
16, 16, 2, 16, 0, 16, ethosu_write[6], 0, 0, 0, T.float32(0.25), 14, "NHWC", 
128, 8, 1, 1, 1, 1, 1, 1, 1, placeholder_global_3[0], 112, 12, 
placeholder_d_global_3[0], 32, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 0, 0, 
0, dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_multiple_prim_funcs():
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main():
+            T.evaluate(0)
+
+        @T.prim_func
+        def abc():
+            T.evaluate(0)
+
+    mod = Module
+
+    err_rgx = (
+        r"Expected a single primitive function called 'main'. "
+        r"Please run the HoistAllocates pass in conjunction with the 
LowerToTIR\(\) pass."
+    )
+    with pytest.raises(tvm.TVMError, match=err_rgx):
+        mod = HoistAllocates()(mod)
+
+
+def test_no_main_prim_func():
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def abs():
+            T.evaluate(0)
+
+    mod = Module
+
+    err_rgx = (
+        r"Expected a single primitive function called 'main'. "
+        r"Please run the HoistAllocates pass in conjunction with the 
LowerToTIR\(\) pass."
+    )
+    with pytest.raises(tvm.TVMError, match=err_rgx):
+        mod = HoistAllocates()(mod)

Reply via email to