solin319 commented on a change in pull request #8373: distribute training in 
fp16
URL: https://github.com/apache/incubator-mxnet/pull/8373#discussion_r168089512
 
 

 ##########
 File path: src/kvstore/kvstore_dist.h
 ##########
 @@ -272,16 +282,35 @@ class KVStoreDist : public KVStoreLocal {
         // This shouldn't affect training of networks though because training 
involves
         // a sequence of push, pull, then push. This imposes ordering that the
         // second push happens after the first pull, and the pull happens 
after first push.
-        send_buf = merged;  // avoid memory copy
+        if (send_buf.is_none()) {
+          if (storage_type == kDefaultStorage) {
+            send_buf = NDArray(merged.shape(), pinned_ctx_, true, 
mshadow::DataType<DType>::kFlag);
+          } else {
+            send_buf = NDArray(storage_type, merged.shape(), pinned_ctx_,
+                               true, mshadow::DataType<DType>::kFlag);
+          }
+        }
+        if (merged.dtype() == mshadow::DataType<DType>::kFlag) {
+          send_buf = merged;
+        } else {
+          CopyFromTo(merged, &send_buf);
+        }
       } else {
         if (send_buf.is_none()) {
           if (storage_type == kDefaultStorage) {
-            send_buf = NDArray(merged.shape(), pinned_ctx_, true, 
merged.dtype());
+            send_buf = NDArray(merged.shape(), pinned_ctx_, true, 
mshadow::DataType<DType>::kFlag);
           } else {
-            send_buf = NDArray(storage_type, merged.shape(), pinned_ctx_, 
true, merged.dtype());
+            send_buf = NDArray(storage_type, merged.shape(), pinned_ctx_,
+                               true, mshadow::DataType<DType>::kFlag);
           }
         }
-        CopyFromTo(merged, &send_buf);
+        if (merged.dtype() == mshadow::DataType<DType>::kFlag) {
+          CopyFromTo(merged, &send_buf);
+        } else {
+          NDArray tmp = NDArray(merged.shape(), pinned_ctx_, true, 
merged.dtype());
 
 Review comment:
   1. Copy two times, because it doesn't supports to copy from gpu with fp32 to 
cpu with fp16.  
   2. Kvstore_dist just merge grads on all gpus in one machine.
   Kvstore_dist_server merge the grads on all the machines.
   

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to