slyubomirsky commented on code in PR #11286:
URL: https://github.com/apache/tvm/pull/11286#discussion_r881196061


##########
src/runtime/graph_executor/graph_executor.cc:
##########
@@ -448,74 +570,65 @@ void GraphExecutor::SetupOpExecs() {
   for (uint32_t nid = 0; nid < this->GetNumOfNodes(); ++nid) {
     const auto& inode = nodes_[nid];
     if (inode.op_type == "null") continue;
-    std::vector<DLTensor> args;
-    for (const auto& e : inode.inputs) {
-      uint32_t eid = this->entry_id(e);
-      args.push_back(*(data_entry_[eid].operator->()));
-    }
-    for (uint32_t index = 0; index < inode.param.num_outputs; ++index) {
-      uint32_t eid = this->entry_id(nid, index);
-      args.push_back(*(data_entry_[eid].operator->()));
-    }
-    ICHECK(inode.op_type == "tvm_op") << "Can only take tvm_op as op";
+    std::vector<DLTensor> args = {};
 
     std::shared_ptr<OpArgs> op_args = nullptr;
-    std::tie(op_execs_[nid], op_args) = CreateTVMOp(inode.param, args);
-
-    for (size_t i = 0; i < inode.inputs.size(); i++) {
-      uint32_t input_eid = this->entry_id(inode.inputs[i]);
-      // check if op input is model input
-      if (input_node_eids.count(input_eid) > 0) {
-        input_dltensors_[input_eid].push_back(
-            static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
+    std::tie(op_execs_[nid], op_args) = CreateTVMOp(nid, inode.param, args);
+  }
+}
+
+void GraphExecutor::UpdateInputOutputTensors(const 
std::unordered_set<uint32_t>& input_node_eids,
+                                             const 
std::unordered_set<uint32_t>& output_node_eids,
+                                             uint32_t nid,
+                                             
std::shared_ptr<GraphExecutor::OpArgs> op_args) {
+  const auto& inode = nodes_[nid];
+  for (size_t i = 0; i < inode.inputs.size(); i++) {
+    uint32_t input_eid = this->entry_id(inode.inputs[i]);
+    // check if op input is model input
+    if (input_node_eids.count(input_eid) > 0) {
+      auto& eid_input = input_dltensors_[input_eid];
+      auto input_tensor = 
static_cast<DLTensor*>(op_args->arg_values[i].v_handle);
+      if (std::find(eid_input.begin(), eid_input.end(), input_tensor) == 
eid_input.end()) {
+        eid_input.push_back(input_tensor);
       }
-      // check if any model output is the input of the op
-      if (output_node_eids.count(input_eid) > 0) {
-        both_output_opinput_dltensors_[input_eid].push_back(
-            static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
+    }
+    // check if any model output is the input of the op
+    if (output_node_eids.count(input_eid) > 0) {
+      auto& eid_both = both_output_opinput_dltensors_[input_eid];
+      auto both_tensor = 
static_cast<DLTensor*>(op_args->arg_values[i].v_handle);
+      if(std::find(eid_both.begin(), eid_both.end(), both_tensor) == 
eid_both.end()){
+        eid_both.push_back(both_tensor);
       }
     }
+  }
 
-    for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + 
inode.param.num_outputs; ++i) {
-      uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size());
-      // check if op output is model output
-      if (output_node_eids.count(output_eid) > 0) {
-        output_dltensors_[output_eid].push_back(
-            static_cast<DLTensor*>(op_args->arg_values[i].v_handle));
+  for (uint32_t i = inode.inputs.size(); i < inode.inputs.size() + 
inode.param.num_outputs; ++i) {
+    uint32_t output_eid = this->entry_id(nid, i - inode.inputs.size());
+    // check if op output is model output
+    if (output_node_eids.count(output_eid) > 0) {
+      auto& eid_out = output_dltensors_[output_eid];
+      auto out_tensor = 
static_cast<DLTensor*>(op_args->arg_values[i].v_handle);
+      if(std::find(eid_out.begin(), eid_out.end(), out_tensor) == 
eid_out.end()){
+        eid_out.push_back(out_tensor);
       }
     }
   }
 }
-
 std::pair<std::function<void()>, std::shared_ptr<GraphExecutor::OpArgs> >
-GraphExecutor::CreateTVMOp(const TVMOpParam& param, const 
std::vector<DLTensor>& args) {
+GraphExecutor::CreateTVMOp(uint32_t nid, const TVMOpParam& param,
+                           const std::vector<DLTensor>& args) {
   std::shared_ptr<GraphExecutor::OpArgs> arg_ptr = 
std::make_shared<GraphExecutor::OpArgs>();
-  // setup address.
-  arg_ptr->args = args;
-  if (param.flatten_data) {
-    arg_ptr->shape_data.resize(arg_ptr->args.size());
-  }
-  for (size_t i = 0; i < arg_ptr->args.size(); ++i) {
-    TVMValue v;
-    DLTensor* t = &arg_ptr->args[i];
-    v.v_handle = t;
-    arg_ptr->arg_values.push_back(v);
-    arg_ptr->arg_tcodes.push_back(kTVMDLTensorHandle);
-    if (param.flatten_data) {
-      arg_ptr->shape_data[i] =
-          std::accumulate(t->shape, t->shape + t->ndim, 1, 
std::multiplies<int64_t>());
-      t->ndim = 1;
-      t->shape = &(arg_ptr->shape_data[i]);
-    }
-  }
+  // set up the memory location map
+  op_mem_location_map_[nid] = arg_ptr;
 
   if (param.func_name == "__nop") {
     return {[]() {}, arg_ptr};
   } else if (param.func_name == "__copy") {
     // Perform cross device data copy.
     // Directly copy data from the input to the output.
     // TODO(mbs): device_copy cleanup.
-    auto fexec = [arg_ptr]() {
+    auto fexec = [nid, this]() {
+      auto arg_ptr = this->op_mem_location_map_[nid];

Review Comment:
   Perhaps it might be more economical to define `arg_ptr` outside the lambda 
and capture only that? (Same with the other lambdas that capture `this`.) I 
doubt it's a serious issue but it may be worth checking.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to