fix some bugs and reduce training memory a bug from checking cudnn version. a bug from create_cuda_gpu_on(); reduce memory cost from BP.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/f4fae37e Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/f4fae37e Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/f4fae37e Branch: refs/heads/master Commit: f4fae37eb874c6dda4e530d623b17c798deda378 Parents: 26d9cd4 Author: Wei Wang <[email protected]> Authored: Tue Sep 13 20:25:14 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Thu Sep 15 17:55:18 2016 +0800 ---------------------------------------------------------------------- python/singa/layer.py | 12 +++++++----- python/singa/net.py | 15 ++++++++++++++- src/api/core_device.i | 1 + src/model/layer/cudnn_activation.cc | 8 ++++---- src/model/layer/cudnn_convolution.cc | 4 ++-- src/model/layer/cudnn_pooling.cc | 4 ++-- 6 files changed, 30 insertions(+), 14 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/python/singa/layer.py ---------------------------------------------------------------------- diff --git a/python/singa/layer.py b/python/singa/layer.py index f22b3d1..51b46ea 100644 --- a/python/singa/layer.py +++ b/python/singa/layer.py @@ -560,11 +560,13 @@ class Dropout(Layer): conf = self.conf.dropout_conf conf.dropout_ratio = p # 'cudnn' works for v>=5.0 - # if engine.lower() == 'cudnn': - # engine = 'cuda' - _check_engine(engine, ['cudnn', 'singa', 'singacpp', 'singacuda', - 'singacl']) - self.layer = _create_layer(engine, 'Dropout') + if engine.lower() == 'cudnn': + myengine = 'singacuda' + else: + myengine = engine + _check_engine(myengine, ['cudnn', 'singa', 'singacpp', 'singacuda', + 'singacl']) + self.layer = _create_layer(myengine, 'Dropout') if input_sample_shape is not None: self.setup(input_sample_shape) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/python/singa/net.py ---------------------------------------------------------------------- diff --git a/python/singa/net.py b/python/singa/net.py index 0026953..61603c6 100644 --- a/python/singa/net.py +++ b/python/singa/net.py @@ -25,6 +25,8 @@ import tensor import layer import cPickle as pickle +'''For display training information, e.g L1 value of layer data''' +verbose = False class FeedForwardNet(object): @@ -146,15 +148,22 @@ class FeedForwardNet(object): for src in srcs: outs = output_of_layer[src.name] if type(outs) == list: + assert len(outs) > 0, \ + 'the output from layer %s is empty' % src.name inputs.append(outs[0]) + outs.pop(0) else: inputs.append(outs) + output_of_layer[cur.name] = [] disp_src += '+' + src.name # del output_of_layer[src.name] # print disp_src if len(inputs) == 1: inputs = inputs[0] - output_of_layer[cur.name] = cur.forward(flag, inputs) + out= cur.forward(flag, inputs) + if verbose: + print '%s: %f' % (cur.name, out.l1()) + output_of_layer[cur.name] = out inputs = [] # print lyr.name, x.l1() # print output_of_layer @@ -180,9 +189,13 @@ class FeedForwardNet(object): for dst in self.dst_of_layer[cur.name]: outputs = output_of_layer[dst.name] if type(outputs) == list: + assert len(outputs) > 0, \ + 'the gradient from layer %s is empty' % dst.name grads.append(outputs[0]) + outputs.pop(0) else: grads.append(outputs) + output_of_layer[dst.name] = [] # del output_of_layer[dst.name] if len(grads) == 1: grads = grads[0] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/api/core_device.i ---------------------------------------------------------------------- diff --git a/src/api/core_device.i b/src/api/core_device.i index b3521be..f3381ae 100644 --- a/src/api/core_device.i +++ b/src/api/core_device.i @@ -38,6 +38,7 @@ namespace std{ %template(sizePair) std::pair<size_t, size_t>; %template(vectorPair) std::vector<std::pair<size_t, size_t>>; %template(vectorSharedPtr) std::vector<std::shared_ptr<singa::Device>>; +%template(deviceVec) std::vector<int>; } namespace singa{ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_activation.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_activation.cc b/src/model/layer/cudnn_activation.cc index 4ecb375..756b625 100644 --- a/src/model/layer/cudnn_activation.cc +++ b/src/model/layer/cudnn_activation.cc @@ -68,11 +68,11 @@ const Tensor CudnnActivation::Forward(int flag, const Tensor& input) { output.device()->Exec([input, output, this](Context* ctx) { Block* inblock = input.block(), * outblock = output.block(); float alpha = 1.0f, beta = 0.0f; -#if CUDNN_VERSION_MAJOR == 5 +#if CUDNN_MAJOR == 5 CUDNN_CHECK(cudnnActivationForward( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, inblock->data(), &beta, this->desc_, outblock->mutable_data())); -#elif CUDNN_VERSION_MAJOR == 4 +#elif CUDNN_MAJOR == 4 CUDNN_CHECK(cudnnActivationForward_v4( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, inblock->data(), &beta, this->desc_, outblock->mutable_data())); @@ -103,12 +103,12 @@ const std::pair<Tensor, vector<Tensor>> CudnnActivation::Backward( Block* dyblock = grad.block(), * dxblock = dx.block(), * yblock = inout.block(), * xblock = inout.block(); float alpha = 1.0f, beta = 0.0f; -#if CUDNN_VERSION_MAJOR == 5 +#if CUDNN_MAJOR == 5 CUDNN_CHECK(cudnnActivationBackward( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, yblock->data(), this->desc_, dyblock->data(), this->desc_, xblock->data(), &beta, this->desc_, dxblock->mutable_data())); -#elif CUDNN_VERSION_MAJOR == 4 +#elif CUDNN_MAJOR == 4 CUDNN_CHECK(cudnnActivationBackward_v4( ctx->cudnn_handle, this->acti_desc_, &alpha, this->desc_, yblock->data(), this->desc_, dyblock->data(), this->desc_, xblock->data(), &beta, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_convolution.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_convolution.cc b/src/model/layer/cudnn_convolution.cc index ffd2ab7..60ac526 100644 --- a/src/model/layer/cudnn_convolution.cc +++ b/src/model/layer/cudnn_convolution.cc @@ -77,11 +77,11 @@ void CudnnConvolution::InitCudnn(const Tensor &input) { CUDNN_CHECK(cudnnSetConvolution2dDescriptor(conv_desc_, pad_h_, pad_w_, stride_h_, stride_w_, 1, 1, CUDNN_CROSS_CORRELATION)); -#if CUDNN_VERSION_MAJOR == 5 +#if CUDNN_MAJOR == 5 CUDNN_CHECK(cudnnSetFilter4dDescriptor(filter_desc_, GetCudnnDataType(dtype), CUDNN_TENSOR_NCHW, num_filters_, channels_, kernel_h_, kernel_w_)); -#elif CUDNN_VERSION_MAJOR == 4 +#elif CUDNN_MAJOR == 4 CUDNN_CHECK(cudnnSetFilter4dDescriptor_v4( filter_desc_, GetCudnnDataType(dtype), CUDNN_TENSOR_NCHW, num_filters_, channels_, kernel_h_, kernel_w_)); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/f4fae37e/src/model/layer/cudnn_pooling.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_pooling.cc b/src/model/layer/cudnn_pooling.cc index 895ce3c..7c1a465 100644 --- a/src/model/layer/cudnn_pooling.cc +++ b/src/model/layer/cudnn_pooling.cc @@ -64,11 +64,11 @@ void CudnnPooling::InitCudnn(const Tensor &input) { else LOG(FATAL) << "Not implemented!"; -#if CUDNN_VERSION_MAJOR == 5 +#if CUDNN_MAJOR == 5 CUDNN_CHECK(cudnnSetPooling2dDescriptor(pool_desc_, pool_method, nan_prop_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_)); -#elif CUDNN_VERSION_MAJOR == 4 +#elif CUDNN_MAJOR == 4 CUDNN_CHECK(cudnnSetPooling2dDescriptor_v4(pool_desc_, pool_method, nan_prop_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_));
