[GitHub] [incubator-mxnet] anirudh2290 commented on a change in pull request #16654: Multithreaded Inference Support

GitBox Mon, 13 Jan 2020 15:00:37 -0800

anirudh2290 commented on a change in pull request #16654: Multithreaded 
Inference Support
URL: https://github.com/apache/incubator-mxnet/pull/16654#discussion_r366074691


 ##########
 File path: src/imperative/cached_op.h
 ##########
 @@ -26,8 +26,195 @@
 #include <utility>
 #include <string>
 #include <unordered_map>
+#include <map>
+#include "../operator/operator_common.h"
+#include "../operator/subgraph/common.h"
+#include "./imperative_utils.h"
 
 namespace mxnet {
+namespace {
+
+  static const char FULL[] = "full";
+  static const char FORWARD[] = "forward";
+  static const char BACKWARD[] = "backward";
+  static const char REF_COUNT[] = "ref_count";
+  static const char MEM_PLAN[] = "mem_plan";
+  static const char STORAGE_PLAN[] = "storage_plan";
+
+std::string AddPrefix(const std::string& prefix,
+                      const std::string& s) {
+  return prefix + "_" + s;
+}
+
+/* \brief create a forward graph from they Symbol */
+void CreateForwardGraph(const nnvm::Symbol &sym, nnvm::Graph *fwd_graph) {
+  using namespace nnvm;
+  static const auto _copy_op = Op::Get("_copy");
+  {
+    NodeEntryMap<size_t> dedup_out;
+    for (const NodeEntry& nodeEntry : sym.outputs) {
+      if (dedup_out.find(nodeEntry) != dedup_out.end()) {
+        NodePtr copy_node = Node::Create();
+        copy_node->attrs.op = _copy_op;
+        copy_node->attrs.name =
+            nodeEntry.node->attrs.name + "_copy" + 
std::to_string(dedup_out[nodeEntry]++);
+        copy_node->inputs.emplace_back(nodeEntry);
+        if (_copy_op->attr_parser != nullptr) {
+          _copy_op->attr_parser(&(copy_node->attrs));
+        }
+        fwd_graph->outputs.emplace_back(std::move(copy_node));
+      } else {
+        dedup_out.emplace(nodeEntry, 0);
+        fwd_graph->outputs.push_back(nodeEntry);
+      }
+    }
+  }
+}
+
+/* \brief construct  fwd_graph, grad_graph and full_graph from symbol */
+void CreateFullGraph(const nnvm::Symbol& sym,
+                     nnvm::Graph* fwd_graph,
+                     nnvm::Graph* grad_graph,
+                     nnvm::Graph* full_graph,
+                     std::vector<nnvm::NodeEntry>* ograd_entries,
+                     std::unordered_map<uint32_t, uint32_t>* 
fwd_input_to_grad_output) {
+  using namespace nnvm;
+  static const std::vector<const Op*> zero_ops{Op::Get("zeros_like"), 
Op::Get("_zeros")};
+  CreateForwardGraph(sym, fwd_graph);
+
+  bool do_elim_common_expr = dmlc::GetEnv("MXNET_ELIMINATE_COMMON_EXPR", true);
+  if (do_elim_common_expr)
+    *fwd_graph = exec::EliminateCommonExpr(std::move(*fwd_graph));
+
+  // construct backward graph
+  {
+    ograd_entries->reserve(fwd_graph->outputs.size());
+    for (size_t i = 0; i < fwd_graph->outputs.size(); ++i) {
+      nnvm::NodePtr np = Node::Create();
+      np->attrs.name = "_head_grad_" + std::to_string(i);
+      ograd_entries->emplace_back(np);
+    }
+
+    std::vector<NodeEntry> xs;
+    const IndexedGraph& indexed_graph = fwd_graph->indexed_graph();
+    for (size_t i = 0; i < indexed_graph.input_nodes().size(); ++i) {
+      const uint32_t node_id = indexed_graph.input_nodes()[i];
+      if (indexed_graph.mutable_input_nodes().count(node_id))
+        continue;
+      (*fwd_input_to_grad_output)[i] = xs.size();
+      xs.emplace_back(indexed_graph[node_id].weak_ref.lock());
+    }
+
+    CHECK(!xs.empty())
+        << "There are no inputs in computation graph that require gradients.";
+
+    *grad_graph = pass::MXGradient(
+        *fwd_graph, fwd_graph->outputs, xs, *ograd_entries,
+        exec::AggregateGradient, nullptr, nullptr,
+        zero_ops, "_copy");
+  }
+
+  // construct full graph
+  {
+    full_graph->outputs = fwd_graph->outputs;
+    for (const auto& i : grad_graph->outputs) 
full_graph->outputs.emplace_back(i);
+  }
+}
+
+/* \brief Set Ref counts for node entries for forward graph */
+void SetForwardRefCounts(nnvm::Graph *fwd_graph) {
+  const auto& idx = fwd_graph->indexed_graph();
+  CHECK_GE(idx.input_nodes().size(), 1) << "CachedOp requires at least 1 
input";
+
+  std::vector<uint32_t> ref_count(idx.num_node_entries(), 0);
+  for (const auto& i : idx.input_nodes()) ++ref_count[idx.entry_id(i, 0)];
+  for (const auto& i : idx.outputs()) ++ref_count[idx.entry_id(i)];
+  for (size_t i = 0; i < idx.num_nodes(); ++i) {
+    for (const auto& j : idx[i].inputs) ++ref_count[idx.entry_id(j)];
+  }
+
+  fwd_graph->attrs[AddPrefix(FORWARD, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(ref_count));
+}
+
+/* \brief Set Ref counts for node entries for forward graph and full graph */
+void SetRefCounts(nnvm::Graph* fwd_graph, const nnvm::Graph& full_graph) {
+  const auto& idx = fwd_graph->indexed_graph();
+  SetForwardRefCounts(fwd_graph);
+
+  size_t num_forward_nodes = idx.num_nodes();
+  size_t num_forward_entries = idx.num_node_entries();
+
+  const auto& full_idx = full_graph.indexed_graph();
+
+  std::vector<uint32_t> temp_ref_count(full_idx.num_node_entries(), 0);
+  for (size_t i = num_forward_nodes; i < full_idx.num_nodes(); ++i) {
+    for (const auto& j : full_idx[i].inputs) {
+       ++temp_ref_count[full_idx.entry_id(j)];
+    }
+  }
+
+  auto full_ref_count = fwd_graph->GetAttr<std::vector<uint32_t> 
>(AddPrefix(FORWARD,
+                                                                             
REF_COUNT));
+  for (size_t i = 0; i < num_forward_entries; ++i) full_ref_count.at(i) += 
temp_ref_count[i];
+  fwd_graph->attrs[AddPrefix(FULL, REF_COUNT)] =
+      std::make_shared<dmlc::any>(std::move(full_ref_count));
+}
+
+void OptimizeGraph(nnvm::Graph * full_graph, nnvm::Graph * fwd_graph, 
nnvm::Graph * grad_graph,
+                   const Context& context, size_t num_forward_outputs, const 
bool inlining) {
+#if MXNET_USE_CUDA && !defined(_WIN32)
+  if (context.dev_mask() == kGPU &&
+      !inlining &&
+      dmlc::GetEnv("MXNET_USE_FUSION", true)) {
+    nnvm::Graph unoptimized_graph;
+    common::CopyGraph(&unoptimized_graph, *full_graph, false);
+
+    if 
(common::CheckForInputNameDuplicates(unoptimized_graph.indexed_graph())) {
+      full_graph->attrs["num_forward_outputs"] = 
std::make_shared<nnvm::any>(num_forward_outputs);
+      *full_graph = exec::FusePointwiseForward(std::move(*full_graph));
+      full_graph->attrs["num_forward_outputs"] = 
std::make_shared<nnvm::any>(num_forward_outputs);
+      *full_graph = exec::FusePointwiseBackward(std::move(*full_graph));
+      // Check the topological order of inputs
+      const auto &original_inputs = 
unoptimized_graph.indexed_graph().input_nodes();
+      const auto &new_inputs = full_graph->indexed_graph().input_nodes();
+      if (original_inputs.size() != new_inputs.size()) {
+        LOG(WARNING)
+          << "Number of inputs after fusion does not match original number of 
inputs. "
+          << "This is most probably a bug. Disabling fusion for this run.";
+        *full_graph = unoptimized_graph;
+      } else {
+        for (size_t i = 0; i < new_inputs.size(); ++i) {
+          if 
(unoptimized_graph.indexed_graph()[original_inputs[i]].source->attrs.name !=
+              full_graph->indexed_graph()[new_inputs[i]].source->attrs.name) {
+            LOG(WARNING) << "Disabling fusion due to altered topological order 
of inputs.";
+            *full_graph = unoptimized_graph;
+            break;
+          }
+        }
+      }
+    } else {
+      LOG(WARNING)
+        << "Graph contains duplicate names for some of its inputs - fusion is 
NOT enabled!";
+     }
+  }
+#endif  // MXNET_USE_CUDA
+
+  *fwd_graph = nnvm::Graph();
+  fwd_graph->outputs = 
std::vector<nnvm::NodeEntry>(full_graph->outputs.begin(),
+                                                    
full_graph->outputs.begin() +
+                                                    num_forward_outputs);
+  *grad_graph = nnvm::Graph();
+  grad_graph->outputs = 
std::vector<nnvm::NodeEntry>(full_graph->outputs.begin() +
+                                                     num_forward_outputs,
+                                                     
full_graph->outputs.end());
+  SetRefCounts(fwd_graph, *full_graph);
+}
+
 
 Review comment:
   removed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [incubator-mxnet] anirudh2290 commented on a change in pull request #16654: Multithreaded Inference Support

Reply via email to