anirudh2290 commented on a change in pull request #16654: Multithreaded 
Inference Support
URL: https://github.com/apache/incubator-mxnet/pull/16654#discussion_r365089768
 
 

 ##########
 File path: src/imperative/cached_op.h
 ##########
 @@ -26,8 +26,180 @@
 #include <utility>
 #include <string>
 #include <unordered_map>
+#include <map>
+#include "../operator/operator_common.h"
+#include "../operator/subgraph/common.h"
+#include "./imperative_utils.h"
 
 namespace mxnet {
+namespace {
+
+  static const char FULL[] = "full";
+  static const char FORWARD[] = "forward";
+  static const char BACKWARD[] = "backward";
+  static const char REF_COUNT[] = "ref_count";
+  static const char MEM_PLAN[] = "mem_plan";
+  static const char STORAGE_PLAN[] = "storage_plan";
+
+std::string AddPrefix(const std::string& prefix,
+                      const std::string& s) {
+  return prefix + "_" + s;
+}
+void CreateFullGraph(const nnvm::Symbol& sym,
+                     nnvm::Graph* fwd_graph,
+                     nnvm::Graph* grad_graph,
+                     nnvm::Graph* full_graph,
+                     std::vector<nnvm::NodeEntry>* ograd_entries,
+                     std::unordered_map<uint32_t, uint32_t>* 
fwd_input_to_grad_output) {
+  using namespace nnvm;
+  static const std::vector<const Op*> zero_ops{Op::Get("zeros_like"), 
Op::Get("_zeros")};
+  static const auto _copy_op = Op::Get("_copy");
+  {
+    NodeEntryMap<size_t> dedup_out;
+    for (const NodeEntry& nodeEntry : sym.outputs) {
+      if (dedup_out.find(nodeEntry) != dedup_out.end()) {
+        NodePtr copy_node = Node::Create();
+        copy_node->attrs.op = _copy_op;
+        copy_node->attrs.name =
+            nodeEntry.node->attrs.name + "_copy" + 
std::to_string(dedup_out[nodeEntry]++);
+        copy_node->inputs.emplace_back(nodeEntry);
+        if (_copy_op->attr_parser != nullptr) {
+          _copy_op->attr_parser(&(copy_node->attrs));
+        }
+        fwd_graph->outputs.emplace_back(std::move(copy_node));
+      } else {
+        dedup_out.emplace(nodeEntry, 0);
+        fwd_graph->outputs.push_back(nodeEntry);
+      }
+    }
+  }
+
+  bool do_elim_common_expr = dmlc::GetEnv("MXNET_ELIMINATE_COMMON_EXPR", true);
+  if (do_elim_common_expr)
+    *fwd_graph = exec::EliminateCommonExpr(std::move(*fwd_graph));
+
+  // construct backward graph
+  {
+    ograd_entries->reserve(fwd_graph->outputs.size());
+    for (size_t i = 0; i < fwd_graph->outputs.size(); ++i) {
+      nnvm::NodePtr np = Node::Create();
+      np->attrs.name = "_head_grad_" + std::to_string(i);
+      ograd_entries->emplace_back(np);
+    }
+
+    std::vector<NodeEntry> xs;
+    const IndexedGraph& indexed_graph = fwd_graph->indexed_graph();
+    for (size_t i = 0; i < indexed_graph.input_nodes().size(); ++i) {
+      const uint32_t node_id = indexed_graph.input_nodes()[i];
+      if (indexed_graph.mutable_input_nodes().count(node_id))
+        continue;
+      (*fwd_input_to_grad_output)[i] = xs.size();
+      xs.emplace_back(indexed_graph[node_id].weak_ref.lock());
+    }
+
+    CHECK(!xs.empty())
+        << "There are no inputs in computation graph that require gradients.";
+
+    *grad_graph = pass::MXGradient(
+        *fwd_graph, fwd_graph->outputs, xs, *ograd_entries,
+        exec::AggregateGradient, nullptr, nullptr,
+        zero_ops, "_copy");
+  }
+
+  // construct full graph
+  {
+    full_graph->outputs = fwd_graph->outputs;
+    for (const auto& i : grad_graph->outputs) 
full_graph->outputs.emplace_back(i);
+  }
+}
+
+void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {
+  const auto& idx = fwd_graph->indexed_graph();
+  CHECK_GE(idx.input_nodes().size(), 1) << "CachedOp requires at least 1 
input";
+
+  std::vector<uint32_t> ref_count(idx.num_node_entries(), 0);
+  for (const auto& i : idx.input_nodes()) ++ref_count[idx.entry_id(i, 0)];
+  for (const auto& i : idx.outputs()) ++ref_count[idx.entry_id(i)];
+  for (size_t i = 0; i < idx.num_nodes(); ++i) {
+    for (const auto& j : idx[i].inputs) ++ref_count[idx.entry_id(j)];
+  }
+
+  fwd_graph->attrs[AddPrefix(FORWARD, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(ref_count));
+
+  size_t num_forward_nodes = idx.num_nodes();
+  size_t num_forward_entries = idx.num_node_entries();
+
+  const auto& full_idx = full_graph.indexed_graph();
+
+  std::vector<uint32_t> temp_ref_count(full_idx.num_node_entries(), 0);
+  for (size_t i = num_forward_nodes; i < full_idx.num_nodes(); ++i) {
+    for (const auto& j : full_idx[i].inputs) {
+       ++temp_ref_count[full_idx.entry_id(j)];
+    }
+  }
+
+  auto full_ref_count = fwd_graph->GetAttr<std::vector<uint32_t> 
>(AddPrefix(FORWARD,
+                                                                             
REF_COUNT));
+  for (size_t i = 0; i < num_forward_entries; ++i) full_ref_count.at(i) += 
temp_ref_count[i];
+  fwd_graph->attrs[AddPrefix(FULL, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(full_ref_count));
+}
+
+void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, 
nnvm::Graph * grad_graph,
+                   const Context& context, size_t num_forward_outputs, const 
bool inlining) {
+#if MXNET_USE_CUDA && !defined(_WIN32)
+  if (context.dev_mask() == kGPU &&
 
 Review comment:
   this function was from before, refactoring this is outside the scope of this 
pr. 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to