This is an automated email from the ASF dual-hosted git repository.
manupa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 2cb7695 [microNPU] Remove identity operations between non-compute
operations (#10411)
2cb7695 is described below
commit 2cb769560bfb1a67cc37c18895023b76af61d68e
Author: Luke Hutton <[email protected]>
AuthorDate: Fri Mar 25 07:05:45 2022 +0000
[microNPU] Remove identity operations between non-compute operations
(#10411)
Builds upon the work in #10254 to remove identity operations sandwiched
between two non-compute operations (reshape/strided slice - concatenate
is handled differently), under certain conditions. Specifically, an
identity operation is not removed when the dimensionality between the
two non-compute operations is reduced, due to non-congruent values
being accessed incorrectly. For example,
```
strided_slice(dims=4) -> identity -> reshape(dims=4)
```
becomes...
```
strided_slice -> reshape
```
but,
```
strided_slice(dims=4) -> identity -> reshape(dims=2)
```
remains as...
```
strided_slice -> identity -> reshape
```
Change-Id: Ie28ba384fcb3230d6f4651c0c19e2b9526ebcc42
---
python/tvm/relay/backend/contrib/ethosu/codegen.py | 1 +
src/relay/backend/contrib/ethosu/codegen.cc | 61 +++++++++++++++++++---
.../contrib/test_ethosu/test_identity_optimizer.py | 47 +++++++++++++++--
3 files changed, 97 insertions(+), 12 deletions(-)
diff --git a/python/tvm/relay/backend/contrib/ethosu/codegen.py
b/python/tvm/relay/backend/contrib/ethosu/codegen.py
index e8b5cc2..d06622e 100644
--- a/python/tvm/relay/backend/contrib/ethosu/codegen.py
+++ b/python/tvm/relay/backend/contrib/ethosu/codegen.py
@@ -347,6 +347,7 @@ def relay_to_tir(mod: tvm.ir.IRModule) -> tvm.ir.IRModule:
mod = OutlineCompilerFunctions("ethos-u")(mod)
mod = LegalizeEthosU()(mod)
mod = LUTsOptimizer()(mod)
+ mod = relay.transform.InferType()(mod)
mod = IdentityOptimizer()(mod)
mod = LayoutOptimizer()(mod)
mod = relay.transform.InferType()(mod)
diff --git a/src/relay/backend/contrib/ethosu/codegen.cc
b/src/relay/backend/contrib/ethosu/codegen.cc
index 7044669..dfcf54f 100644
--- a/src/relay/backend/contrib/ethosu/codegen.cc
+++ b/src/relay/backend/contrib/ethosu/codegen.cc
@@ -115,13 +115,13 @@ class RemoveRedundantIdentities : public MixedModeMutator
{
Expr Rewrite_(const CallNode* pre, const Expr& post) override {
Call call = Downcast<Call>(post);
- // only consider rewrite if current op is an NPU compute op.
+ // don't consider rewrite if current op is an identity or concatenate.
if (!call->op->IsInstance<OpNode>()) {
return post;
}
const auto* op = call->op.as<OpNode>();
std::string op_name = op->name;
- if (op_name.substr(0, 15) != "contrib.ethosu." || op_name ==
"contrib.ethosu.identity") {
+ if (op_name == "contrib.ethosu.identity" || op_name == "concatenate") {
return post;
}
@@ -129,10 +129,19 @@ class RemoveRedundantIdentities : public MixedModeMutator
{
bool needs_rewrite = false;
Array<Expr> new_args;
for (const auto& arg : call->args) {
- if (const auto* parent_callnode = arg.as<CallNode>()) {
+ Expr current_arg = arg;
+
+ // expand tuple to get parent op if we run into one - nested tuples are
not supported.
+ if (const auto* tuple_get_item = arg.as<TupleGetItemNode>()) {
+ const auto* tuple = tuple_get_item->tuple.as<TupleNode>();
+ current_arg = tuple->fields[tuple_get_item->index];
+ }
+
+ if (const auto* parent_callnode = current_arg.as<CallNode>()) {
if (const auto* parent_op = parent_callnode->op.as<OpNode>()) {
Call parent_call = GetRef<Call>(parent_callnode);
- if (parent_op->name == "contrib.ethosu.identity" &&
IdentityDoesNothing(parent_call)) {
+ if (parent_op->name == "contrib.ethosu.identity" &&
IdentityDoesNothing(parent_call) &&
+ CheckIdentityBetweenTransformOperations(call, parent_call)) {
needs_rewrite = true;
new_args.push_back(parent_call->args[0]);
continue;
@@ -143,7 +152,10 @@ class RemoveRedundantIdentities : public MixedModeMutator {
}
if (needs_rewrite) {
- return Call(call->op, new_args, call->attrs, call->type_args);
+ Call new_call = Call(call->op, new_args, call->attrs, call->type_args);
+ // since we are only removing an identity, we know the type information
has not changed
+ new_call->checked_type_ = call->checked_type_;
+ return new_call;
}
return post;
}
@@ -156,6 +168,41 @@ class RemoveRedundantIdentities : public MixedModeMutator {
bool has_no_activation = attrs->activation == "NONE";
return does_not_requantize && has_no_activation;
}
+
+ bool CheckIdentityBetweenTransformOperations(const Call& call, const Call&
identity_call) {
+ const auto* op = call->op.as<OpNode>();
+ std::vector<std::string> nc_ops = {"reshape", "strided_slice"};
+
+ if (op && (std::find(nc_ops.begin(), nc_ops.end(), op->name) !=
nc_ops.end())) {
+ // check if the parent to identity operation is also a non-compute
operation,
+ // if it isn't we can safely remove the identity in question by
returning true.
+ const auto* identity_arg = identity_call->args[0].as<CallNode>();
+ if (!identity_arg) {
+ return true;
+ }
+ const auto* identity_arg_op = identity_arg->op.as<OpNode>();
+ if (!identity_arg_op ||
+ !(std::find(nc_ops.begin(), nc_ops.end(), identity_arg_op->name) !=
nc_ops.end())) {
+ return true;
+ }
+
+ const auto* call_tt = call->checked_type_.as<TensorTypeNode>();
+ const auto* identity_arg_tt =
identity_arg->checked_type_.as<TensorTypeNode>();
+ CHECK(call_tt && identity_arg_tt)
+ << "InferType should be run before RemoveRedundantIdentities";
+
+ // we can only remove the identity operation if the second non-compute
operation
+ // in the sequence does not reduce the dimensionality of the output to
the first
+ // non-compute operation. Doing so could lead to data being accessed
incorrectly
+ // by the subsequent compute operation due to the reduction in
dimensionality.
+ size_t first_transform_op_dims = identity_arg_tt->shape.size();
+ size_t second_transform_op_dims = call_tt->shape.size();
+ if (second_transform_op_dims < first_transform_op_dims) {
+ return false;
+ }
+ }
+ return true;
+ }
};
/*!
@@ -177,8 +224,8 @@ tvm::transform::Pass IdentityOptimizer() {
}
return mod;
};
- return tvm::transform::CreateModulePass(pass_func, 0,
-
"relay.backend.contrib.ethos-u.IdentityOptimizer", {});
+ return tvm::transform::CreateModulePass(
+ pass_func, 0, "relay.backend.contrib.ethos-u.IdentityOptimizer",
{"InferType"});
}
TVM_REGISTER_GLOBAL("relay.ext.ethos-u.IdentityOptimizer").set_body_typed(IdentityOptimizer);
diff --git a/tests/python/contrib/test_ethosu/test_identity_optimizer.py
b/tests/python/contrib/test_ethosu/test_identity_optimizer.py
index a2bb4f4..8a42fe8 100644
--- a/tests/python/contrib/test_ethosu/test_identity_optimizer.py
+++ b/tests/python/contrib/test_ethosu/test_identity_optimizer.py
@@ -179,12 +179,14 @@ def test_many_output_identity():
def get_graph(get_expected=False):
x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
x = relay.reshape(x, newshape=(1, 1, 4, 4))
- identity = infra.make_ethosu_identity(x)
+ if not get_expected:
+ x = infra.make_ethosu_identity(x)
outputs = []
for _ in range(4):
- ifm = x if get_expected else identity
- outputs.append(infra.make_ethosu_unary_elementwise(ifm, 4, "ABS"))
- outputs.append(relay.strided_slice(identity, begin=(0, 0, 0, 0),
end=(1, 1, 4, 4)))
+ outputs.append(infra.make_ethosu_unary_elementwise(x, 4, "ABS"))
+ ss = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 1, 4, 4))
+ identity_2 = infra.make_ethosu_identity(ss)
+ outputs.append(identity_2)
out = relay.concatenate(outputs, axis=0)
return relay.Function(relay.analysis.free_vars(out), out)
@@ -220,7 +222,8 @@ def test_identity_removal_with_multiple_transform_ops():
def get_graph(get_expected=False):
x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
x = relay.strided_slice(x, begin=[0, 0, 0, 0], end=[1, 2, 2, 2])
- x = infra.make_ethosu_identity(x)
+ if not get_expected:
+ x = infra.make_ethosu_identity(x)
x = relay.reshape(x, newshape=(1, 1, 1, 8))
if not get_expected:
x = infra.make_ethosu_identity(x)
@@ -267,6 +270,25 @@ def test_identity_single_removal_on_binary_elementwise():
_assert_structural_equal(actual, expected)
+def test_multiple_transform_ops_with_reduction_in_dimensionality():
+ """Removal of an identity operation between two transform operations is
usually okay.
+ However, if the dimensionality of the input is reduced by the second
transformation
+ operation, it can lead to an output mismatch. Checking that the pass
doesn't remove
+ an identity given this case."""
+
+ def get_graph():
+ x = relay.var("x", shape=(1, 2, 2, 4), dtype="int8")
+ x = relay.strided_slice(x, begin=(0, 0, 0, 0), end=(1, 2, 2, 2))
+ x = infra.make_ethosu_identity(x)
+ x = relay.reshape(x, newshape=(1, 2, 4))
+ x = infra.make_ethosu_identity(x)
+ return relay.Function(relay.analysis.free_vars(x), x)
+
+ actual = _optimize(get_graph())
+ expected = _optimize(get_graph(), optimize=False)
+ _assert_structural_equal(actual, expected)
+
+
def test_identity_optimizer_runs_in_compilation_pipeline():
"""Checks that the identity optimization pass is run as part of the NPU
compilation pipeline."""
@@ -320,3 +342,18 @@ def test_multi_output_identity_has_same_output():
return y
_compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256")
+
+
+def test_multiple_transform_ops_same_output():
+ """Check case of identity removal between transform ops and
+ then without, making sure they have the same output."""
+ ifm_shape = (1, 2, 2, 4)
+
+ @tf.function
+ def model(x):
+ x = tf.reshape(x, (1, 1, 4, 4))
+ x = tf.slice(x, (0, 0, 0, 0), (1, 1, 4, 3))
+ x = tf.reshape(x, (12,))
+ return x
+
+ _compare_tvm_with_tflite(model, [ifm_shape], "ethos-u55-256")