This is an automated email from the ASF dual-hosted git repository.
syfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 1234f88b60 [BugFix][Relay][GraphExecutor] Fix set_input_zero_copy()
precision bug (#15291)
1234f88b60 is described below
commit 1234f88b609cd4118f3318c17dccef5d77760302
Author: zhuwenxi <[email protected]>
AuthorDate: Wed Jul 12 09:57:31 2023 +0800
[BugFix][Relay][GraphExecutor] Fix set_input_zero_copy() precision bug
(#15291)
---
src/runtime/graph_executor/graph_executor.cc | 25 +++-
src/runtime/graph_executor/graph_executor.h | 6 +-
tests/python/unittest/test_set_input_zero_copy.py | 137 ++++++++++++++++++++++
3 files changed, 161 insertions(+), 7 deletions(-)
diff --git a/src/runtime/graph_executor/graph_executor.cc
b/src/runtime/graph_executor/graph_executor.cc
index f4b3647830..777a5a442a 100644
--- a/src/runtime/graph_executor/graph_executor.cc
+++ b/src/runtime/graph_executor/graph_executor.cc
@@ -475,8 +475,13 @@ void GraphExecutor::SetupStorage() {
// is mapped to this pool.
data_entry_.resize(num_node_entries());
data_alignment_.resize(num_node_entries());
+ // sid_to_eid has a size of storage_id's size, which is the size of
storage_pool_.
+ sid_to_eid_.resize(storage_pool_.size());
for (size_t i = 0; i < data_entry_.size(); ++i) {
int storage_id = attrs_.storage_id[i];
+ // Update "storage_id -> entry_id" pair.
+ sid_to_eid_[storage_id].push_back(i);
+
ICHECK_LT(static_cast<size_t>(storage_id), storage_pool_.size());
data_entry_[i] = storage_pool_[storage_id].CreateView(attrs_.shape[i],
vtype[i]);
@@ -504,14 +509,14 @@ void GraphExecutor::SetupOpExecs() {
for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) {
const auto& inode = nodes_[nid];
if (inode.op_type == "null") continue;
- std::vector<DLTensor> args;
+ std::vector<DLTensor*> args;
for (const auto& e : inode.inputs) {
uint32_t eid = this->entry_id(e);
- args.push_back(*(data_entry_[eid].operator->()));
+ args.push_back(const_cast<DLTensor*>(data_entry_[eid].operator->()));
}
for (uint32_t index = 0; index < inode.param.num_outputs; ++index) {
uint32_t eid = this->entry_id(nid, index);
- args.push_back(*(data_entry_[eid].operator->()));
+ args.push_back(const_cast<DLTensor*>(data_entry_[eid].operator->()));
}
ICHECK(inode.op_type == "tvm_op") << "Can only take tvm_op as op";
@@ -524,6 +529,16 @@ void GraphExecutor::SetupOpExecs() {
if (input_node_eids.count(input_eid) > 0) {
input_dltensors_[input_eid].push_back(
static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
+
+ // Data entry who has the same storage_id should also be pushed into
"input_dltensors" and
+ // being able to be updated by "SetInputZeroCopy()". This is to handle
the situation that a
+ // "relay.reshape" follows immediately after input and input dltensor
and reshape's output
+ // dltensor point to the same data_entry.
+ auto storage_id = attrs_.storage_id[input_eid];
+ for (auto eid : sid_to_eid_[storage_id]) {
+ input_dltensors_[input_eid].push_back(
+ const_cast<DLTensor*>(data_entry_[eid].operator->()));
+ }
}
// check if any model output is the input of the op
if (output_node_eids.count(input_eid) > 0) {
@@ -544,7 +559,7 @@ void GraphExecutor::SetupOpExecs() {
}
std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs>>
GraphExecutor::CreateTVMOp(
- const TVMOpParam& param, const std::vector<DLTensor>& args) {
+ const TVMOpParam& param, const std::vector<DLTensor*>& args) {
std::shared_ptr<GraphExecutor::OpArgs> arg_ptr =
std::make_shared<GraphExecutor::OpArgs>();
// setup address.
arg_ptr->args = args;
@@ -553,7 +568,7 @@ std::pair<std::function<void()>,
std::shared_ptr<GraphExecutor::OpArgs>> GraphEx
}
for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
TVMValue v;
- DLTensor* t = &arg_ptr->args[i];
+ DLTensor* t = arg_ptr->args[i];
v.v_handle = t;
arg_ptr->arg_values.push_back(v);
arg_ptr->arg_tcodes.push_back(kTVMDLTensorHandle);
diff --git a/src/runtime/graph_executor/graph_executor.h
b/src/runtime/graph_executor/graph_executor.h
index fb2dded4cf..2f6b8b8147 100644
--- a/src/runtime/graph_executor/graph_executor.h
+++ b/src/runtime/graph_executor/graph_executor.h
@@ -66,7 +66,7 @@ struct TVMOpParam {
*/
class TVM_DLL GraphExecutor : public ModuleNode {
struct OpArgs {
- std::vector<DLTensor> args;
+ std::vector<DLTensor*> args;
std::vector<TVMValue> arg_values;
std::vector<int> arg_tcodes;
std::vector<int64_t> shape_data;
@@ -437,7 +437,7 @@ class TVM_DLL GraphExecutor : public ModuleNode {
* \return The created executor.
*/
std::pair<std::function<void()>, std::shared_ptr<OpArgs>> CreateTVMOp(
- const TVMOpParam& attrs, const std::vector<DLTensor>& args);
+ const TVMOpParam& attrs, const std::vector<DLTensor*>& args);
// Get node entry index.
uint32_t entry_id(uint32_t nid, uint32_t index) const { return
node_row_ptr_[nid] + index; }
// Get node entry index.
@@ -460,6 +460,8 @@ class TVM_DLL GraphExecutor : public ModuleNode {
std::vector<std::vector<DLTensor*>> output_dltensors_;
/*! \brief Used for quick node(both model output and op input) DLTensor*
lookup given an eid. */
std::vector<std::vector<DLTensor*>> both_output_opinput_dltensors_;
+ /*! \brief Used for quick entry_id lookup given an storage_id. */
+ std::vector<std::vector<uint32_t>> sid_to_eid_;
/*! \brief Used for quick entry indexing. */
std::vector<uint32_t> node_row_ptr_;
/*! \brief Output entries. */
diff --git a/tests/python/unittest/test_set_input_zero_copy.py
b/tests/python/unittest/test_set_input_zero_copy.py
new file mode 100644
index 0000000000..3effbaed15
--- /dev/null
+++ b/tests/python/unittest/test_set_input_zero_copy.py
@@ -0,0 +1,137 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=missing-function-docstring,missing-module-docstring
+import tvm
+from tvm import relay
+import numpy as np
+from tvm.contrib import graph_executor
+from tvm import testing
+import numpy as np
+import pytest
+
+
+dev = tvm.cpu(0)
+target = tvm.target.Target("llvm")
+
+
+def build_relay_module(func):
+ mod = tvm.IRModule()
+ mod["main"] = func
+ lib = relay.build(mod, target=target)
+
+ return graph_executor.GraphModule(lib["default"](dev))
+
+
[email protected]_llvm
+def test_simple_graph():
+ # Simple relay func:
+ # 1. y = x + 1
+ # 2. return y
+ shape = (2, 2)
+ x = relay.var("x", shape=shape, dtype="float32")
+ y = relay.add(x, relay.ones(shape, dtype="float32"))
+ func = relay.Function([x], y)
+
+ # Build 2 exactly same relay modules.
+ mod = build_relay_module(func)
+ mod_zero_copy = build_relay_module(func)
+ x_np = np.random.uniform(size=shape).astype(np.float32)
+
+ # Use set_input()
+ x_nd = tvm.nd.array(x_np, device=dev)
+ mod.set_input("x", x_nd)
+ mod.run()
+
+ # Use set_input_zero_copy()
+ x_nd_zero_copy = tvm.nd.array(x_np, device=dev)
+ index = mod_zero_copy.get_input_index("x")
+ mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy)
+ mod_zero_copy.run()
+
+ # Expect get same output "x".
+ testing.assert_allclose(mod.get_output(0).numpy(),
mod_zero_copy.get_output(0).numpy())
+
+
[email protected]_llvm
+def test_input_in_output():
+ # Relay func that input is also in output:
+ # 1. y = x + 1
+ # 2. return [x, y]
+ shape = (3, 4)
+ x = relay.var("x", shape=shape, dtype="float32")
+ y = relay.add(x, relay.ones(shape, dtype="float32"))
+ func = relay.Function([x], relay.expr.Tuple([x, y]))
+
+ # Build 2 exactly same relay modules.
+ mod = build_relay_module(func)
+ mod_zero_copy = build_relay_module(func)
+
+ x_np = np.random.uniform(size=shape).astype(np.float32)
+
+ # Use set_input()
+ x_nd = tvm.nd.array(x_np, device=dev)
+ mod.set_input("x", x_nd)
+ mod.run()
+
+ # Use set_input_zero_copy()
+ x_nd_zero_copy = tvm.nd.array(x_np, device=dev)
+ index = mod_zero_copy.get_input_index("x")
+ mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy)
+ mod_zero_copy.run()
+
+ # Expect get same output "x".
+ testing.assert_allclose(mod.get_output(0).numpy(),
mod_zero_copy.get_output(0).numpy())
+
+
[email protected]_llvm
+def test_reshape_after_input():
+ # Relay func that a reshape op follows immediately after input:
+ # 1. y = x + 1
+ # 2. return [x, y]
+ shape = (3, 4)
+ x = relay.var("x", shape=shape, dtype="float32")
+ y = relay.reshape(x, (1, 12))
+ z = relay.add(y, relay.ones((1, 12), dtype="float32"))
+ func = relay.Function([x], relay.expr.Tuple([x, y, z]))
+
+ # Build 2 exactly same relay modules.
+ mod = build_relay_module(func)
+ mod_zero_copy = build_relay_module(func)
+
+ x_np = np.random.uniform(size=shape).astype(np.float32)
+
+ # Use set_input()
+ x_nd = tvm.nd.array(x_np, device=dev)
+ mod.set_input("x", x_nd)
+ mod.run()
+
+ # Use set_input_zero_copy()
+ x_nd_zero_copy = tvm.nd.array(x_np, device=dev)
+ index = mod_zero_copy.get_input_index("x")
+ mod_zero_copy.module["set_input_zero_copy"](index, x_nd_zero_copy)
+ mod_zero_copy.run()
+
+ # Expect get same output "x".
+ testing.assert_allclose(mod.get_output(0).numpy(),
mod_zero_copy.get_output(0).numpy())
+ # Expect get same output "y".
+ testing.assert_allclose(mod.get_output(1).numpy(),
mod_zero_copy.get_output(1).numpy())
+
+
+if __name__ == "__main__":
+ test_simple_graph()
+ test_input_in_output()
+ test_reshape_after_input()