eric-haibin-lin commented on a change in pull request #10371: [MXNET-263] 
Support for dot(dns, csr) = dns and dot(dns, csr.T) = dns on GPU
URL: https://github.com/apache/incubator-mxnet/pull/10371#discussion_r183262040
 
 

 ##########
 File path: src/operator/tensor/dot-inl.h
 ##########
 @@ -217,35 +227,57 @@ inline bool DotForwardInferStorageType(const 
nnvm::NodeAttrs& attrs,
   bool only_lhs_transpose = param.transpose_a && !param.transpose_b;
   bool rhs_rsp_or_dns =
       rhs_stype == kRowSparseStorage || rhs_stype == kDefaultStorage;
+  bool hint_has_value = param.forward_stype.has_value();
+  NDArrayStorageType target_stype = hint_has_value ?
+                                    
static_cast<NDArrayStorageType>(param.forward_stype.value()) :
+                                    kUndefinedStorage;
   if (!dispatched && lhs_stype == kDefaultStorage &&
       rhs_stype == kDefaultStorage) {
     // dns, dns -> dns
-    dispatched = storage_type_assign(&out_stype, kDefaultStorage, 
dispatch_mode,
-                                     DispatchMode::kFCompute);
+    target_stype = hint_has_value ? target_stype : kDefaultStorage;
+    if (target_stype == kDefaultStorage) {
+      dispatched = storage_type_assign(&out_stype, kDefaultStorage, 
dispatch_mode,
+                                       DispatchMode::kFCompute);
+    }
   }
-  if (!dispatched && lhs_stype == kCSRStorage && only_lhs_transpose &&
-      (rhs_stype == kRowSparseStorage || rhs_stype == kDefaultStorage)) {
+  if (!dispatched && lhs_stype == kCSRStorage && only_lhs_transpose && 
rhs_rsp_or_dns) {
     // csr.T, rsp/dns -> rsp
-    dispatched = storage_type_assign(&out_stype, kRowSparseStorage,
-                                     dispatch_mode, DispatchMode::kFComputeEx);
+    target_stype = hint_has_value ? target_stype : kRowSparseStorage;
+    if (target_stype == kRowSparseStorage) {
+      dispatched = storage_type_assign(&out_stype, kRowSparseStorage,
+                                       dispatch_mode, 
DispatchMode::kFComputeEx);
+    }
   }
   if (!dispatched && lhs_stype == kCSRStorage && rhs_rsp_or_dns &&
       !param.transpose_a && !param.transpose_b) {
     // csr, rsp/dns -> dns
-    dispatched = storage_type_assign(&out_stype, kDefaultStorage, 
dispatch_mode,
-                                     DispatchMode::kFComputeEx);
+    target_stype = hint_has_value ? target_stype : kDefaultStorage;
+    if (target_stype == kDefaultStorage) {
+      dispatched = storage_type_assign(&out_stype, kDefaultStorage, 
dispatch_mode,
+                                       DispatchMode::kFComputeEx);
+    }
   }
   if (!dispatched && lhs_stype == kDefaultStorage && rhs_stype == kCSRStorage 
&&
-      !param.transpose_a && !param.transpose_b) {
-    // dns, csr -> csr
-    const bool invalid_ctx = dev_mask != mshadow::cpu::kDevMask;
-    const auto dispatch_ex = invalid_ctx ? DispatchMode::kFComputeFallback
-                                         : DispatchMode::kFComputeEx;
-    dispatched = storage_type_assign(&out_stype, kCSRStorage, dispatch_mode,
-                                     dispatch_ex);
+      !param.transpose_a) {
+    target_stype = hint_has_value ? target_stype : kCSRStorage;
+    // dns, csr -> csr on CPU
+    if (dev_mask == mshadow::cpu::kDevMask && !param.transpose_b) {
+      if (target_stype == kCSRStorage) {
+        dispatched = storage_type_assign(&out_stype, kCSRStorage, 
dispatch_mode,
+                                         DispatchMode::kFComputeEx);
+      }
+    // dns, csr/csr.T -> dns on GPU
+    } else if (dev_mask == mshadow::gpu::kDevMask) {
+      if (target_stype == kDefaultStorage) {
+        dispatched = storage_type_assign(&out_stype, kDefaultStorage, 
dispatch_mode,
+                                         DispatchMode::kFComputeEx);
+      }
+    }
   }
   if (!dispatched) {
-    dispatched = dispatch_fallback(out_attrs, dispatch_mode);
+    target_stype = (target_stype == kUndefinedStorage)? kDefaultStorage : 
target_stype;
+    dispatched = storage_type_assign(&out_stype, target_stype, dispatch_mode,
+                                     DispatchMode::kFComputeFallback);
   }
 
 Review comment:
   I think you should also update InferStorageType for backward dot

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to