sunjiweiswift commented on a change in pull request #8497:
URL: https://github.com/apache/tvm/pull/8497#discussion_r672849571
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -463,6 +515,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string&
name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
+ } else if (name == "set_output_zero_copy") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ if (String::CanConvertFrom(args[0])) {
+ int in_idx = this->GetOutputIndex(args[0].operator String());
+ if (in_idx >= 0) this->SetOutputZeroCopy(in_idx, args[1]);
Review comment:
?What does dido mean
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -463,6 +515,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string&
name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
+ } else if (name == "set_output_zero_copy") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ if (String::CanConvertFrom(args[0])) {
+ int in_idx = this->GetOutputIndex(args[0].operator String());
Review comment:
I will modify
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
+
+ // check the consistency of output
+ ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
+ ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
+ ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
+ ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
+ ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
+ for (auto i = 0; i < data_ref->ndim; ++i) {
+ ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
+ }
Review comment:
I will modify
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
Review comment:
No need to modify DLtensor
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
+
+ // check the consistency of output
+ ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
+ ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
+ ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
+ ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
+ ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
+ for (auto i = 0; i < data_ref->ndim; ++i) {
+ ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
+ }
+
+ // Update the data pointer for output op
+ ICHECK_LT(static_cast<size_t>(index), output_dltensors_.size());
+ output_dltensors_[index]->data = data_ref->data;
Review comment:
// Update the input of the op connected to the output
for (auto node : nodes_) {
auto it = std::find(node.inputs.begin(), node.inputs.end(), output_node);
if (it != node.inputs.end()) {
int input_nid = GetInputIndex(node.name);
int index = it - node.inputs.begin();
uint32_t eid = this->entry_id(input_nodes_[input_nid], index);
for (DLTensor* t : input_dltensors_[eid]) {
t->data = data_ref->data;
}
}
}
Can it sloved?sorry I don't have the test case
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -389,6 +434,13 @@ void GraphExecutor::SetupOpExecs() {
input_dltensors_[eid].push_back(static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}
+ // check if op output is model output
+ if (output_node_id.count(nid) > 0) {
+ for (uint32_t index = inode.inputs.size();
+ index < inode.param.num_outputs + inode.param.num_inputs; ++index) {
+
output_dltensors_.push_back(static_cast<DLTensor*>(op_args->arg_values[index].v_handle));
Review comment:
Is it possible to sort outputs_?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]