huajsj commented on a change in pull request #8497:
URL: https://github.com/apache/tvm/pull/8497#discussion_r672767569
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -463,6 +515,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string&
name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
+ } else if (name == "set_output_zero_copy") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ if (String::CanConvertFrom(args[0])) {
+ int in_idx = this->GetOutputIndex(args[0].operator String());
Review comment:
out_idx
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -463,6 +515,15 @@ PackedFunc GraphExecutor::GetFunction(const std::string&
name,
this->SetInputZeroCopy(args[0], args[1]);
}
});
+ } else if (name == "set_output_zero_copy") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ if (String::CanConvertFrom(args[0])) {
+ int in_idx = this->GetOutputIndex(args[0].operator String());
+ if (in_idx >= 0) this->SetOutputZeroCopy(in_idx, args[1]);
Review comment:
dido
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
+
+ // check the consistency of output
+ ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
+ ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
+ ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
+ ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
+ ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
+ for (auto i = 0; i < data_ref->ndim; ++i) {
+ ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
+ }
Review comment:
SetInputZeroCopy use exactly same code, a common function should can
help to avoid duplicate.
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -389,6 +434,13 @@ void GraphExecutor::SetupOpExecs() {
input_dltensors_[eid].push_back(static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
}
}
+ // check if op output is model output
+ if (output_node_id.count(nid) > 0) {
+ for (uint32_t index = inode.inputs.size();
+ index < inode.param.num_outputs + inode.param.num_inputs; ++index) {
+
output_dltensors_.push_back(static_cast<DLTensor*>(op_args->arg_values[index].v_handle));
Review comment:
for such case ' "heads": [ [48, 0, 0], [36, 0, 0]], the said logic
would wrong.
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
+
+ // check the consistency of output
+ ICHECK_EQ(data_alignment_[eid], details::GetDataAlignment(*data_ref));
+ ICHECK_EQ(reinterpret_cast<size_t>(data_ref->data) % kAllocAlignment, 0);
+ ICHECK_EQ(old_t->ndim, static_cast<size_t>(data_ref->ndim));
+ ICHECK_EQ(old_t->device.device_type, data_ref->device.device_type);
+ ICHECK_EQ(old_t->device.device_id, data_ref->device.device_id);
+ for (auto i = 0; i < data_ref->ndim; ++i) {
+ ICHECK_EQ(old_t->shape[i], data_ref->shape[i]);
+ }
+
+ // Update the data pointer for output op
+ ICHECK_LT(static_cast<size_t>(index), output_dltensors_.size());
+ output_dltensors_[index]->data = data_ref->data;
Review comment:
if an output at same time are inputs of multiple nodes, the said logic
would wrong.
##########
File path: src/runtime/graph_executor/graph_executor.cc
##########
@@ -139,6 +156,30 @@ void GraphExecutor::SetInputZeroCopy(int index, DLTensor*
data_ref) {
t->data = data_ref->data;
}
}
+/*!
+ * \brief set index-th output to the graph without copying the data.
+ * \param index The output index.
+ * \param data_ref The output data that is referred.
+ */
+void GraphExecutor::SetOutputZeroCopy(int index, DLTensor* data_ref) {
+ ICHECK_LT(static_cast<size_t>(index), outputs_.size());
+ uint32_t eid = this->entry_id(outputs_[index]);
+ const DLTensor* old_t = data_entry_[eid].operator->();
Review comment:
const_cast<DLTensor*>(data_entry_[eid].operator->());
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]