apeforest commented on a change in pull request #14836: Refactor AGInfo and 
Imperative
URL: https://github.com/apache/incubator-mxnet/pull/14836#discussion_r305027116
 
 

 ##########
 File path: src/imperative/imperative.cc
 ##########
 @@ -316,181 +312,223 @@ std::vector<NDArray*> Imperative::Backward(
       info.outputs.back() = static_cast<real_t>(1.0);
     }
   }
+  return ograd_entries;
+}
 
-  // Get gradient graph
-  Symbol sym;
-  sym.outputs = graph.outputs;
-  std::vector<NodeEntry> xs;
-  std::vector<NDArray*> x_grads;
-  std::vector<OpReqType> x_reqs;
-  if (variables.size()) {
-    xs.reserve(variables.size());
-    x_grads.reserve(variables.size());
-    x_reqs.reserve(variables.size());
+struct Imperative::GradientVariableNodes {
+  std::vector<nnvm::NodeEntry> variable_nodes;
+  std::vector<NDArray*> gradients;
+  std::vector<OpReqType> op_req_types;
+};
+
+Imperative::GradientVariableNodes Imperative::CreateGradientVariableNodes(
+    const std::vector<NDArray *> &variables,
+    const std::vector<nnvm::NodeEntry> &outputs) {
+  GradientVariableNodes var_nodes;
+  if (!variables.empty()) {
+    var_nodes.variable_nodes.reserve(variables.size());
+    var_nodes.gradients.reserve(variables.size());
+    var_nodes.op_req_types.reserve(variables.size());
     for (size_t i = 0; i < variables.size(); ++i) {
       CHECK(!AGInfo::IsNone(*variables[i]) &&
             AGInfo::IsVariable(variables[i]->entry_.node))
           << "Cannot differentiate with respect to the " << i+1 << "-th 
variable"
           << " because it does not require gradient.";
-      xs.emplace_back(variables[i]->entry_);
-      x_grads.push_back(new NDArray());
-      x_reqs.push_back(kWriteTo);
+      var_nodes.variable_nodes.emplace_back(variables[i]->entry_);
+      var_nodes.gradients.push_back(new NDArray());
+      var_nodes.op_req_types.push_back(kWriteTo);
     }
   } else {
-    std::vector<NodePtr> args = sym.ListInputs(Symbol::kReadOnlyArgs);
-    xs.reserve(args.size());
-    x_grads.reserve(args.size());
-    x_reqs.reserve(args.size());
-    for (const auto& i : args) {
-      AGInfo& info = AGInfo::Get(i);
-      if (info.grad_req == kNullOp) continue;
-      xs.emplace_back(NodeEntry{i, 0, 0});
-      x_grads.push_back(&info.out_grads[0]);
-      x_reqs.push_back(info.grad_req);
-      info.fresh_out_grad = true;
+    nnvm::Symbol s;
+    s.outputs = outputs;
+    std::vector<nnvm::NodePtr> input_ro_nodes = 
s.ListInputs(Symbol::kReadOnlyArgs);
+    var_nodes.variable_nodes.reserve(input_ro_nodes.size());
+    var_nodes.gradients.reserve(input_ro_nodes.size());
+    var_nodes.op_req_types.reserve(input_ro_nodes.size());
+    for (const auto& node : input_ro_nodes) {
+      AGInfo& info = AGInfo::Get(node);
+      if (info.grad_req != kNullOp) {
+        var_nodes.variable_nodes.emplace_back(node);
+        var_nodes.gradients.push_back(&info.out_grads[0]);
+        var_nodes.op_req_types.push_back(info.grad_req);
+        info.fresh_out_grad = true;
+      }
     }
-    CHECK_GT(xs.size(), 0)
+    CHECK_GT(var_nodes.variable_nodes.size(), 0)
         << "There are no inputs in computation graph that require gradients.";
   }
+  return var_nodes;
+}
+
+
 
-  Graph g_graph = pass::MXGradient(
-      graph, graph.outputs, xs, ograd_entries,
+std::vector<NDArray*> Imperative::Backward(
+    const std::vector<NDArray*>& outputs,
+    const std::vector<NDArray*>& ograds,
+    const std::vector<NDArray*>& variables,
+    bool is_train, bool retain_graph,
+    bool create_graph) {
+  using namespace nnvm;
+  using namespace imperative;
+  static const std::vector<const Op*> zero_ops{Op::Get("zeros_like"), 
Op::Get("_zeros")};
+  static const Op* copy_op = Op::Get("_copy");
+
+  Graph graph;
+  graph.outputs = CreateForwardGraph(outputs);
+
+
+  // Prepare head gradient nodes
+  std::vector<NodeEntry> ograd_entries = CreateHeadGradientNodes(outputs, 
ograds);
+
+  // Get variable nodes
+  GradientVariableNodes gvars = CreateGradientVariableNodes(variables, 
graph.outputs);
+
+  // Run backward on the graph
+  Graph gradient_graph = pass::MXGradient(
+      graph, graph.outputs, gvars.variable_nodes, ograd_entries,
       exec::AggregateGradient, nullptr, nullptr,
       zero_ops, "_copy");
-  CHECK_EQ(g_graph.outputs.size(), xs.size());
-  for (const auto& e : g_graph.outputs) {
-    if (e.node->op() == nullptr) {
+
+  CHECK_EQ(gradient_graph.outputs.size(), gvars.variable_nodes.size());
+  std::vector<nnvm::NodeEntry> forward_outputs = graph.outputs;
+  const size_t num_forward_outputs = graph.outputs.size();
+
+  // TODO(larroy): move inside pass::MXGradient
+  for (const auto& backward_node : gradient_graph.outputs) {
+    if (backward_node.node->is_variable()) {
       auto node = Node::Create();
       node->attrs.op = copy_op;
-      node->inputs.push_back(e);
+      node->inputs.push_back(backward_node);
       graph.outputs.emplace_back(std::move(node));
     } else {
-      graph.outputs.push_back(e);
+      graph.outputs.push_back(backward_node);
     }
   }
-  const auto& idx = graph.indexed_graph();
+
+  const auto& indexed_graph = graph.indexed_graph();
   // get number of nodes used in forward pass
   size_t num_forward_nodes = 0;
   size_t num_forward_entries = 0;
   for (size_t i = 0; i < num_forward_outputs; ++i) {
     num_forward_nodes = std::max(
-        num_forward_nodes, static_cast<size_t>(idx.outputs()[i].node_id + 1));
+        num_forward_nodes, 
static_cast<size_t>(indexed_graph.outputs()[i].node_id + 1));
     num_forward_entries = std::max(
-        num_forward_entries, 
static_cast<size_t>(idx.entry_id(idx.outputs()[i])) + 1);
+        num_forward_entries, static_cast<size_t>(indexed_graph.entry_id(
+            indexed_graph.outputs()[i])) + 1);
   }
 
   // Allocate buffer
-  std::vector<NDArray> buff(idx.num_node_entries());
+  std::vector<NDArray> buff(indexed_graph.num_node_entries());
   std::vector<uint32_t> ref_count(buff.size(), 0);
   std::vector<OpStatePtr> states;
   std::vector<NDArray*> arrays;
   arrays.reserve(buff.size());
-  for (auto& buffered_array : buff) {
+  for (auto& buffered_array : buff)
     arrays.push_back(&buffered_array);
-  }
+
   if (create_graph) {
     states.resize(num_forward_nodes);
-    nnvm::DFSVisit(sym.outputs, [&](const nnvm::NodePtr& n) {
+    nnvm::DFSVisit(forward_outputs, [&](const nnvm::NodePtr& n) {
       AGInfo& info = AGInfo::Get(n);
-      states[idx.node_id(n.get())] = info.state;
+      states.at(indexed_graph.node_id(n.get())) = info.state;
       for (uint32_t i = 0; i < info.outputs.size(); ++i) {
 
 Review comment:
   change i to `size_t` to be consistent in your refactoring? 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to