[4/6] incubator-singa git commit: fix some bugs and reduce training memory

wangwei Thu, 15 Sep 2016 07:11:30 -0700

fix some bugs and reduce training memory

a bug from checking cudnn version.
a bug from create_cuda_gpu_on();
reduce memory cost from BP.



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f4fae37e
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f4fae37e
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f4fae37e

Branch: refs/heads/master
Commit: f4fae37eb874c6dda4e530d623b17c798deda378
Parents: 26d9cd4
Author: Wei Wang <[email protected]>
Authored: Tue Sep 13 20:25:14 2016 +0800
Committer: Wei Wang <[email protected]>
Committed: Thu Sep 15 17:55:18 2016 +0800

----------------------------------------------------------------------
 python/singa/layer.py                | 12 +++++++-----
 python/singa/net.py                  | 15 ++++++++++++++-
 src/api/core_device.i                |  1 +
 src/model/layer/cudnn_activation.cc  |  8 ++++----
 src/model/layer/cudnn_convolution.cc |  4 ++--
 src/model/layer/cudnn_pooling.cc     |  4 ++--
 6 files changed, 30 insertions(+), 14 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/python/singa/layer.py
----------------------------------------------------------------------
diff --git a/python/singa/layer.py b/python/singa/layer.py
index f22b3d1..51b46ea 100644
--- a/python/singa/layer.py
+++ b/python/singa/layer.py
@@ -560,11 +560,13 @@ class Dropout(Layer):
         conf = self.conf.dropout_conf
         conf.dropout_ratio = p
         # 'cudnn' works for v>=5.0
-        #  if engine.lower() == 'cudnn':
-        #      engine = 'cuda'
-        _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda',
-                               'singacl'])
-        self.layer = _create_layer(engine, 'Dropout')
+        if engine.lower() == 'cudnn':
+            myengine = 'singacuda'
+        else:
+            myengine = engine
+        _check_engine(myengine, ['cudnn', 'singa', 'singacpp', 'singacuda',
+                                 'singacl'])
+        self.layer = _create_layer(myengine, 'Dropout')
         if input_sample_shape is not None:
             self.setup(input_sample_shape)
 

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/python/singa/net.py
----------------------------------------------------------------------
diff --git a/python/singa/net.py b/python/singa/net.py
index 0026953..61603c6 100644
--- a/python/singa/net.py
+++ b/python/singa/net.py
@@ -25,6 +25,8 @@ import tensor
 import layer
 import cPickle as pickle
 
+'''For display training information, e.g L1 value of layer data'''
+verbose = False
 
 class FeedForwardNet(object):
 
@@ -146,15 +148,22 @@ class FeedForwardNet(object):
             for src in srcs:
                 outs = output_of_layer[src.name]
                 if type(outs) == list:
+                    assert len(outs) > 0, \
+                            'the output from layer %s is empty' % src.name
                     inputs.append(outs[0])
+                    outs.pop(0)
                 else:
                     inputs.append(outs)
+                    output_of_layer[cur.name] = []
                 disp_src += '+' + src.name
                 # del output_of_layer[src.name]
             # print disp_src
             if len(inputs) == 1:
                 inputs = inputs[0]
-            output_of_layer[cur.name] = cur.forward(flag, inputs)
+            out= cur.forward(flag, inputs)
+            if verbose:
+                print '%s: %f' % (cur.name, out.l1())
+            output_of_layer[cur.name] = out
             inputs = []
             # print lyr.name, x.l1()
         # print output_of_layer
@@ -180,9 +189,13 @@ class FeedForwardNet(object):
             for dst in self.dst_of_layer[cur.name]:
                 outputs = output_of_layer[dst.name]
                 if type(outputs) == list:
+                    assert len(outputs) > 0, \
+                            'the gradient from layer %s is empty' % dst.name
                     grads.append(outputs[0])
+                    outputs.pop(0)
                 else:
                     grads.append(outputs)
+                    output_of_layer[dst.name] = []
                 # del output_of_layer[dst.name]
             if len(grads) == 1:
                 grads = grads[0]

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/api/core_device.i
----------------------------------------------------------------------
diff --git a/src/api/core_device.i b/src/api/core_device.i
index b3521be..f3381ae 100644
--- a/src/api/core_device.i
+++ b/src/api/core_device.i
@@ -38,6 +38,7 @@ namespace std{
 %template(sizePair) std::pair<size_t, size_t>;
 %template(vectorPair) std::vector<std::pair<size_t, size_t>>;
 %template(vectorSharedPtr) std::vector<std::shared_ptr<singa::Device>>;
+%template(deviceVec) std::vector<int>;
 }
 
 namespace singa{

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_activation.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_activation.cc 
b/src/model/layer/cudnn_activation.cc
index 4ecb375..756b625 100644
--- a/src/model/layer/cudnn_activation.cc
+++ b/src/model/layer/cudnn_activation.cc
@@ -68,11 +68,11 @@ const Tensor CudnnActivation::Forward(int flag, const 
Tensor& input) {
   output.device()->Exec([input, output, this](Context* ctx) {
     Block* inblock = input.block(), * outblock = output.block();
     float alpha = 1.0f, beta = 0.0f;
-#if CUDNN_VERSION_MAJOR == 5
+#if CUDNN_MAJOR == 5
     CUDNN_CHECK(cudnnActivationForward(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
         inblock->data(), &beta, this->desc_, outblock->mutable_data()));
-#elif CUDNN_VERSION_MAJOR == 4
+#elif CUDNN_MAJOR == 4
     CUDNN_CHECK(cudnnActivationForward_v4(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
         inblock->data(), &beta, this->desc_, outblock->mutable_data()));
@@ -103,12 +103,12 @@ const std::pair<Tensor, vector<Tensor>> 
CudnnActivation::Backward(
     Block* dyblock = grad.block(), * dxblock = dx.block(),
            * yblock = inout.block(), * xblock = inout.block();
     float alpha = 1.0f, beta = 0.0f;
-#if CUDNN_VERSION_MAJOR == 5
+#if CUDNN_MAJOR == 5
     CUDNN_CHECK(cudnnActivationBackward(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_,
         yblock->data(), this->desc_, dyblock->data(), this->desc_,
         xblock->data(), &beta, this->desc_, dxblock->mutable_data()));
-#elif CUDNN_VERSION_MAJOR == 4
+#elif CUDNN_MAJOR == 4
     CUDNN_CHECK(cudnnActivationBackward_v4(
         ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, 
yblock->data(),
         this->desc_, dyblock->data(), this->desc_, xblock->data(), &beta,

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_convolution.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_convolution.cc 
b/src/model/layer/cudnn_convolution.cc
index ffd2ab7..60ac526 100644
--- a/src/model/layer/cudnn_convolution.cc
+++ b/src/model/layer/cudnn_convolution.cc
@@ -77,11 +77,11 @@ void CudnnConvolution::InitCudnn(const Tensor &input) {
   CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc_, pad_h_, pad_w_,
                                               stride_h_, stride_w_, 1, 1,
                                               CUDNN_CROSS_CORRELATION));
-#if CUDNN_VERSION_MAJOR == 5
+#if CUDNN_MAJOR == 5
   CUDNN_CHECK(cudnnSetFilter4dDescriptor(filter_desc_, GetCudnnDataType(dtype),
                                          CUDNN_TENSOR_NCHW, num_filters_,
                                          channels_, kernel_h_, kernel_w_));
-#elif CUDNN_VERSION_MAJOR == 4
+#elif CUDNN_MAJOR == 4
   CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4(
       filter_desc_, GetCudnnDataType(dtype), CUDNN_TENSOR_NCHW, num_filters_,
       channels_, kernel_h_, kernel_w_));

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_pooling.cc
----------------------------------------------------------------------
diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc
index 895ce3c..7c1a465 100644
--- a/src/model/layer/cudnn_pooling.cc
+++ b/src/model/layer/cudnn_pooling.cc
@@ -64,11 +64,11 @@ void CudnnPooling::InitCudnn(const Tensor &input) {
   else
     LOG(FATAL) << "Not implemented!";
 
-#if CUDNN_VERSION_MAJOR == 5
+#if CUDNN_MAJOR == 5
   CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc_, pool_method, nan_prop_,
                                           kernel_h_, kernel_w_, pad_h_, pad_w_,
                                           stride_h_, stride_w_));
-#elif CUDNN_VERSION_MAJOR == 4
+#elif CUDNN_MAJOR == 4
   CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(pool_desc_, pool_method, 
nan_prop_,
                                              kernel_h_, kernel_w_, pad_h_,
                                              pad_w_, stride_h_, stride_w_));

[4/6] incubator-singa git commit: fix some bugs and reduce training memory

Reply via email to