reminisce commented on a change in pull request #15938: Tvm broadcast backward
URL: https://github.com/apache/incubator-mxnet/pull/15938#discussion_r315995425
 
 

 ##########
 File path: src/operator/contrib/tvmop/ufunc.cc
 ##########
 @@ -37,29 +38,88 @@ namespace op {
 
 static constexpr char func_vadd_cpu[] = "vadd";
 static constexpr char func_vadd_gpu[] = "cuda_vadd";
+static constexpr char func_bakcward_vadd_cpu[] = "backward_vadd";
+static constexpr char func_bakcward_vadd_gpu[] = "cuda_backward_vadd";
 
 template<const char* func>
-void TVMBroadcastCompute(const nnvm::NodeAttrs& attrs,
-                         const mxnet::OpContext& ctx,
-                         const std::vector<TBlob>& inputs,
-                         const std::vector<OpReqType>& req,
-                         const std::vector<TBlob>& outputs) {
+void TVMBinaryCompute(const nnvm::NodeAttrs& attrs,
+                      const mxnet::OpContext& ctx,
+                      const std::vector<TBlob>& inputs,
+                      const std::vector<OpReqType>& req,
+                      const std::vector<TBlob>& outputs) {
   CHECK_EQ(inputs.size(), 2U);
   CHECK_EQ(outputs.size(), 1U);
   tvm::runtime::TVMOpModule::Get()->Call(func, ctx, {inputs[0], inputs[1], 
outputs[0]});
 }
 
+template<const char* func>
+void TVMBinaryBackwardComputeUseNone(const nnvm::NodeAttrs& attrs,
+                                     const mxnet::OpContext& ctx,
+                                     const std::vector<TBlob>& inputs,
+                                     const std::vector<OpReqType>& req,
+                                     const std::vector<TBlob>& outputs) {
+  CHECK_EQ(inputs.size(), 1U);
+  CHECK_EQ(outputs.size(), 2U);
+  int ndim = inputs[0].shape_.ndim();
+  for (int k = 0; k < 2; ++k) {
+    // dispatch by backward
+    std::vector<int> ov, iv;
+    const TBlob& ograd = inputs[0], igrad = outputs[k];
+    bool flag = ograd.size(0) != igrad.size(0);
+    for (int i = 0; i < ndim; ++i) {
 
 Review comment:
   Please correct me if my understanding is wrong, but don't you still need 
kernels generated for `ndims < 5` since you will collapse consecutive 
dimensions where reduction is performed? For example, given a 5d shape `(2, 3, 
4, 5, 6)`, and perform reduction on `axis=(1, 2)`, the `tblob` will be first 
reshaped into `(2, 12, 30)`, and then reduce on `axis=1`.  In this case, do you 
need a kernel generated for 3D shapes?

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

Reply via email to