manupa-arm commented on a change in pull request #10725:
URL: https://github.com/apache/tvm/pull/10725#discussion_r833100678



##########
File path: src/tir/contrib/ethosu/passes.cc
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tir/contrib/ethosu/passes.cc
+ *
+ * \brief Passes used in TIR lowering for the microNPU compiler.
+ */
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/function.h>
+#include <tvm/tir/stmt_functor.h>
+#include <tvm/tir/transform.h>
+
+namespace tvm {
+namespace tir {
+namespace contrib {
+namespace ethosu {
+
+/*!
+ * \brief This mutator moves allocates to the top of the body of the main
+ * function.
+ *
+ * For example,
+ *               allocate {
+ *                   extern_call(...) {
+ *                       allocate {
+ *     Before:               extern_call(...)
+ *                       }
+ *                   }
+ *               }
+ *
+ *               allocate {
+ *                   allocate {
+ *                      extern_call(...)
+ *     After:           extern_call(...)
+ *                   }
+ *               }
+ */
+class HoistAllocatesMutator : public StmtExprMutator {
+ public:
+  HoistAllocatesMutator() {}
+
+  IRModule operator()(IRModule mod) {
+    GlobalVar gv = mod->GetGlobalVar("main");
+    PrimFunc main_func = Downcast<PrimFunc>(mod->Lookup(gv));
+    Stmt new_main_func_body = this->VisitStmt(main_func->body);
+
+    // Write all allocates that were removed in reverse order
+    for (auto it = allocates_.rbegin(); it != allocates_.rend(); it++) {
+      Allocate current_alloc = *it;
+      if (it != allocates_.rbegin()) {
+        new_main_func_body = SeqStmt({new_main_func_body});
+      }
+      new_main_func_body =
+          Allocate(current_alloc->buffer_var, current_alloc->dtype, 
current_alloc->extents,
+                   current_alloc->condition, new_main_func_body, 
current_alloc->annotations,
+                   current_alloc->span);
+    }
+
+    PrimFunc new_main_func =
+        PrimFunc(main_func->params, new_main_func_body, main_func->ret_type, 
main_func->buffer_map,
+                 main_func->preflattened_buffer_map, main_func->attrs);
+    mod->Update(gv, new_main_func);
+    return mod;
+  }
+
+ private:
+  Stmt VisitStmt_(const AllocateNode* op) override {
+    allocates_.push_back(GetRef<Allocate>(op));
+
+    // Skip the allocate node itself
+    const auto* seq = op->body.as<SeqStmtNode>();
+    ICHECK(seq) << "Expected a sequence statement but got " << 
op->body->GetTypeKey() << ".";
+
+    // Traverse the allocate body recursively and flatten
+    Array<Stmt> new_stmts;
+    new_stmts.reserve(seq->seq.size());
+    for (const Stmt& old_stmt : seq->seq) {
+      new_stmts.push_back(VisitStmt(old_stmt));
+    }
+    return SeqStmt::Flatten(new_stmts);
+  }
+
+  /*! A stack to store allocates as they are visited. */
+  std::vector<Allocate> allocates_;
+};
+
+/*!
+ * \brief A pass to hoist allocate nodes to the top of the body of the main 
function.
+ *
+ * \return tvm::transform::Pass
+ */
+tvm::transform::Pass HoistAllocates() {
+  auto pass_func = [=](IRModule mod, tvm::transform::PassContext ctx) {
+    return HoistAllocatesMutator()(mod);
+  };
+  return tvm::transform::CreateModulePass(pass_func, 0, 
"tir.contrib.ethos-u.HoistAllocates", {});

Review comment:
       Any reason we cant do a PrimFunc pass ? -- we might need to think of a 
way to skip non-npu functions.

##########
File path: tests/python/contrib/test_ethosu/test_hoist_allocates.py
##########
@@ -0,0 +1,204 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""
+Testing the pass that moves allocate nodes to the body of the function.
+"""
+# pylint: disable=wrong-import-position
+
+import pytest
+
+pytest.importorskip("ethosu.vela")
+
+import tvm
+from tvm.script import tir as T
+from tvm.relay.backend.contrib.ethosu.tir.passes import HoistAllocates
+
+
+class ExtractAllocateInfo:
+    """
+    Extracts information from allocate nodes which we will use as sanity to 
check the allocate
+    after mutation.
+    """
+
+    def __init__(self):
+        self.allocates_info = []
+
+    def __call__(self, mod):
+        tvm.tir.stmt_functor.ir_transform(mod["main"].body, self._pre_visit, 
None, ["tir.Allocate"])
+        return self.allocates_info
+
+    def _pre_visit(self, stmt):
+        self.allocates_info.append(
+            {"extents": stmt.extents, "dtype": stmt.dtype, "condition": 
stmt.condition}
+        )
+
+
+def CheckAllocates(allocate_info):  # pylint: disable=invalid-name
+    """
+    Checks that all allocates have been visited before an external call has 
been visited.
+    Additionally, checks that the information for each allocate is what is 
expected.
+    """
+
+    allocate_idx = 0
+    expected_num_allocates = len(allocate_info)
+
+    def _pre_visit(stmt):
+        nonlocal allocate_idx, expected_num_allocates
+
+        if isinstance(stmt, tvm.tir.Allocate):
+            expected = allocate_info[allocate_idx]
+            assert (
+                stmt.extents == expected["extents"]
+            ), f"Allocate extents {stmt.extents} did not match expected 
{expected['extents']}"
+            assert (
+                stmt.dtype == expected["dtype"]
+            ), f"Allocate dtype {stmt.dtype} did not match expected 
{expected['dtype']}"
+            assert (
+                stmt.condition == expected["condition"]
+            ), f"Allocate condition {stmt.condition} did not match expected 
{expected['condition']}"
+
+            allocate_idx += 1
+        else:
+            assert (
+                allocate_idx == expected_num_allocates
+            ), "A call node was visited before all allocates"
+
+    def _ftransform(f, mod, ctx):
+        f.with_body(
+            tvm.tir.stmt_functor.ir_transform(
+                f.body, _pre_visit, None, ["tir.Allocate", "tir.Call"]
+            )
+        )
+
+    return tvm.tir.transform.prim_func_pass(_ftransform, opt_level=0)
+
+
+def test_double_convolution():
+    """
+    Test to check the HoistAllocates pass works on a function with two 
convolutions.
+    """
+
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(3402,), "int8"], placeholder_encoded: 
T.Buffer[(128,), "uint8"], placeholder_encoded_1: T.Buffer[(32,), "uint8"], 
placeholder_encoded_2: T.Buffer[(128,), "uint8"], placeholder_encoded_3: 
T.Buffer[(32,), "uint8"], ethosu_write: T.Buffer[(3402,), "int8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 27, 42, 3], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(placeholder_encoded, [3, 3, 2, 3], 
dtype="int8")
+            T.preflattened_buffer(placeholder_encoded_1, [3, 10], 
dtype="uint8")
+            T.preflattened_buffer(placeholder_encoded_2, [3, 3, 2, 3], 
dtype="int8")
+            T.preflattened_buffer(placeholder_encoded_3, [3, 10], 
dtype="uint8")
+            T.preflattened_buffer(ethosu_write, [1, 27, 42, 3], dtype="int8", 
data=ethosu_write.data)
+            # body
+            placeholder_global = T.allocate([128], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded[0], 
128, placeholder_global[0], dtype="handle"))
+            placeholder_d_global = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_1[0], 
32, placeholder_d_global[0], dtype="handle"))
+            ethosu_write_2 = T.allocate([18144], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 
0, 42, placeholder[0], 0, 0, 0, T.float32(0.0039215646684169769), -128, "NHWC", 
126, 3, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write_2[0], 0, 0, 0, 
T.float32(0.031308155506849289), -128, "NHCWB16", 672, 16, 1, 2, 3, 1, 1, 1, 2, 
placeholder_global[0], 128, 0, placeholder_d_global[0], 32, 2, 0, 2, 1, "NONE", 
0, 0, "TFL", "NONE", dtype="handle"))
+            placeholder_d_global_1 = T.allocate([128], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_2[0], 
128, placeholder_d_global_1[0], dtype="handle"))
+            placeholder_d_global_2 = T.allocate([32], "uint8", "global")
+            T.evaluate(T.call_extern("ethosu_copy", placeholder_encoded_3[0], 
32, placeholder_d_global_2[0], dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_conv2d", "int8", 27, 42, 3, 27, 
0, 42, ethosu_write_2[0], 0, 0, 0, T.float32(0.031308155506849289), -128, 
"NHCWB16", 672, 16, 1, "int8", 27, 42, 3, 27, 0, 42, ethosu_write[0], 0, 0, 0, 
T.float32(0.23604340851306915), -128, "NHWC", 126, 3, 1, 2, 3, 1, 1, 1, 2, 
placeholder_d_global_1[0], 128, 0, placeholder_d_global_2[0], 32, 2, 0, 2, 1, 
"CLIP", -128, 127, "TFL", "NONE", dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_identities():
+    """
+    Test to check the HoistAllocates pass works on a function with multiple 
identity
+    operations, with no copy operations.
+    """
+
+    # fmt: off
+    @tvm.script.ir_module
+    class Module:
+        @T.prim_func
+        def main(placeholder: T.Buffer[(24,), "int8"], T_concat: 
T.Buffer[(24,), "int8"]) -> None:
+            # function attr dict
+            T.func_attr({"from_legacy_te_schedule": True, "global_symbol": 
"main", "tir.noalias": True})
+            T.preflattened_buffer(placeholder, [1, 2, 3, 4], dtype="int8", 
data=placeholder.data)
+            T.preflattened_buffer(T_concat, [24], dtype="int8", 
data=T_concat.data)
+            # body
+            ethosu_write = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, placeholder[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 1, 
1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_1 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, ethosu_write[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 
0, 1, ethosu_write_1[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 
1, 1, 12, 0, 1, T_concat[12], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_2 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, placeholder[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 4, 
1, 0, 3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+            ethosu_write_3 = T.allocate([12], "int8", "global")
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 1, 3, 4, 1, 0, 
3, ethosu_write_2[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, "int8", 1, 3, 
4, 1, 0, 3, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 4, 1, 
"AVG", 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", 
dtype="handle"))
+            T.evaluate(T.call_extern("ethosu_identity", "int8", 12, 1, 1, 12, 
0, 1, ethosu_write_3[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "int8", 12, 
1, 1, 12, 0, 1, T_concat[0], 0, 0, 0, T.float32(1), 0, "NHWC", 1, 1, 1, "AVG", 
1, 1, 1, 1, 1, 1, 0, 0, 0, 0, "NONE", 0, 0, "TFL", "NONE", dtype="handle"))
+    # fmt: on
+
+    mod = Module
+    allocate_info = ExtractAllocateInfo()(mod)
+    mod = HoistAllocates()(mod)
+    CheckAllocates(allocate_info)(mod)
+
+
+def test_outer_seq_stmt():

Review comment:
       We need test cases that contains non-npu functions as well as more than 
one npu function.

##########
File path: src/tir/contrib/ethosu/passes.cc
##########
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file tir/contrib/ethosu/passes.cc
+ *
+ * \brief Passes used in TIR lowering for the microNPU compiler.
+ */
+#include <tvm/tir/builtin.h>
+#include <tvm/tir/function.h>
+#include <tvm/tir/stmt_functor.h>
+#include <tvm/tir/transform.h>
+
+namespace tvm {
+namespace tir {
+namespace contrib {
+namespace ethosu {
+
+/*!
+ * \brief This mutator moves allocates to the top of the body of the main
+ * function.
+ *
+ * For example,
+ *               allocate {
+ *                   extern_call(...) {
+ *                       allocate {
+ *     Before:               extern_call(...)
+ *                       }
+ *                   }
+ *               }
+ *
+ *               allocate {
+ *                   allocate {
+ *                      extern_call(...)
+ *     After:           extern_call(...)
+ *                   }
+ *               }
+ */
+class HoistAllocatesMutator : public StmtExprMutator {
+ public:
+  HoistAllocatesMutator() {}
+
+  IRModule operator()(IRModule mod) {
+    GlobalVar gv = mod->GetGlobalVar("main");
+    PrimFunc main_func = Downcast<PrimFunc>(mod->Lookup(gv));

Review comment:
       This will break if the mod has a global relay function. I admit that 
will not be the case in the current integration. In the same time, I think we 
dont need this restriction. (i.e. looking at npu PrimFuncs should be sufficient 
for this pass). Also take look at the comment below about the possibility of 
making this a PrimFunc pass.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to