Repository: incubator-singa Updated Branches: refs/heads/master 33cf5461b -> 82c12c2b9
SINGA-271 Add Concat and Slice layers fixed some bugs from concat and slice layers: 1. the input shape (list) of the layer setup function should be the shape of a single sample 2. the order of source layers of the concat layer should match the order of the gradient tensors for the net::backward() function 3. the input and ouput of concate layer's forward is a list of tensor->single tensor; slice layer's forward is: a single tensor->a list tensor; for the backward, the data type is revsersed as the foward function. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/82c12c2b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/82c12c2b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/82c12c2b Branch: refs/heads/master Commit: 82c12c2b9e81926cbd46f621fa7f8da98f2217e8 Parents: 33cf546 Author: Wei Wang <wang...@comp.nus.edu.sg> Authored: Fri Dec 2 16:27:20 2016 +0800 Committer: Wei Wang <wangwei...@gmail.com> Committed: Wed Dec 14 14:46:23 2016 +0800 ---------------------------------------------------------------------- python/singa/layer.py | 126 +++++++++++++++++++++++++++++++++-------- python/singa/net.py | 5 +- src/model/layer/concat.cc | 34 ++++++----- src/model/layer/concat.h | 5 +- src/model/layer/slice.cc | 31 +++++----- src/model/layer/slice.h | 2 + test/python/test_layer.py | 18 +++++- test/singa/test_concat.cc | 23 ++++++-- test/singa/test_slice.cc | 19 +++---- 9 files changed, 191 insertions(+), 72 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/python/singa/layer.py ---------------------------------------------------------------------- diff --git a/python/singa/layer.py b/python/singa/layer.py index a7a14c5..f82d2df 100644 --- a/python/singa/layer.py +++ b/python/singa/layer.py @@ -116,26 +116,6 @@ class Layer(object): self.has_setup = False - def caffe_layer(self): - ''' - Create a singa layer based on caffe layer configuration. - ''' - _check_engine(engine, ['cudnn', 'singacpp', 'singacuda']) - if self.conf.type == 'InnerProduct' or self.conf.type == 14: - self.layer = _create_layer(engine, 'Dense') - else: - self.layer = _create_layer(engine, str(self.conf.type)) - - def param_names(self): - ''' - Returns: - a list of strings, one for the name of one parameter Tensor - ''' - names = [] - for x in self.param_specs: - names.append(x['name']) - return names - def setup(self, in_shapes): '''Call the C++ setup function to create params and set some meta data. @@ -154,6 +134,17 @@ class Layer(object): self.layer.Setup(list(in_shapes), self.conf.SerializeToString()) self.has_setup = True + def caffe_layer(self): + ''' + Create a singa layer based on caffe layer configuration. + ''' + _check_engine(engine, ['cudnn', 'singacpp', 'singacuda']) + if self.conf.type == 'InnerProduct' or self.conf.type == 14: + self.layer = _create_layer(engine, 'Dense') + else: + self.layer = _create_layer(engine, str(self.conf.type)) + + def get_output_sample_shape(self): '''Called after setup to get the shape of the output sample(s). @@ -165,6 +156,16 @@ class Layer(object): 'Must call setup() before get_output_sample_shape()' return self.layer.GetOutputSampleShape() + def param_names(self): + ''' + Returns: + a list of strings, one for the name of one parameter Tensor + ''' + names = [] + for x in self.param_specs: + names.append(x['name']) + return names + def param_values(self): '''Return param value tensors. @@ -278,10 +279,12 @@ class Dummy(Layer): self.has_setup = True def forward(self, flag, x): + '''Return the input x''' return x def backward(self, falg, dy): - return dy + '''Return dy, []''' + return dy, [] class Conv2D(Layer): @@ -732,6 +735,7 @@ class Merge(Layer): def forward(self, flag, inputs): '''Merge all input tensors by summation. + TODO(wangwei) do element-wise merge operations, e.g., avg, count Args: flag: not used. inputs (list): a list of tensors @@ -749,6 +753,14 @@ class Merge(Layer): return output def backward(self, flag, grad): + '''Replicate the grad for each input source layer. + + Args: + grad(Tensor), the gradient tensor of the merged result from forward + + Returns: + A list of replicated grad, one per source layer + ''' assert isinstance(grad, tensor.Tensor), 'The input must be Tensor' return [grad] * self.num_input, [] # * self.num_input @@ -789,6 +801,14 @@ class Split(Layer): return outputs def backward(self, flag, grads): + '''Sum all grad tensors to generate a single output tensor. + + Args: + grads(list of Tensor), one per dest layer + + Returns: + a single tensor as the sum of all grads + ''' assert len(grads) > 1, 'There must be multiple gradients' dx = tensor.Tensor() dx.reset_like(grads[0]) @@ -805,7 +825,7 @@ class Concat(Layer): Args: axis(int): 0 for concat row; 1 for concat columns; - input_sample_shapes: a list of shape tuples, one per input tensor + input_sample_shapes: a list of sample shape tuples, one per input tensor ''' def __init__(self, name, axis, input_sample_shapes=None): @@ -820,6 +840,36 @@ class Concat(Layer): if input_sample_shapes is not None: self.setup(input_sample_shapes) + def forward(self, flag, inputs): + '''Concatenate all input tensors. + + Args: + flag: same as Layer::forward() + input: a list of tensors + + Returns: + a single concatenated tensor + ''' + assert type(inputs) is list, 'Must be a list of Tensors' + ys = super(Concat, self).forward(flag, inputs) + return ys[0] + + + def backward(self, flag, dy): + '''Backward propagate gradients through this layer. + + Args: + flag: same as Layer::backward() + dy(Tensor): the gradient tensors of y w.r.t objective loss + Return: + <dx, []>, dx is a list tensors for the gradient of the inputs; [] + is an empty list. + ''' + if type(dy) is tensor.Tensor: + dy = [dy] + assert type(dy) is list, 'Must be a list(Tensor)' + return super(Concat, self).backward(flag, dy) + class Slice(Layer): '''Slice the input tensor into multiple sub-tensors vertially (axis=0) or @@ -829,7 +879,7 @@ class Slice(Layer): axis (int): 0 for slice rows; 1 for slice columns; slice_point(list): positions along the axis to do slice; there are n-1 points for n sub-tensors; - input_sample_shape: input tensor shape + input_sample_shape: input tensor sample shape ''' def __init__(self, name, axis, slice_point, input_sample_shape=None): @@ -850,6 +900,36 @@ class Slice(Layer): for i in range(len(self.conf.slice_conf.slice_point) + 1): out.append(self.layer.GetOutputSampleShape(i)) + def forward(self, flag, x): + '''Slice the input tensor on the given axis. + + Args: + flag: same as Layer::forward() + x: a single input tensor + + Returns: + a list a output tensor + ''' + if type(x) is tensor.Tensor: + x = [x] + assert type(x) is list, 'Must be a list of Tensor' + return super(Slice, self).forward(flag, x) + + def backward(self, flag, grads): + '''Concate all grad tensors to generate a single output tensor + + Args: + flag: same as Layer::backward() + grads: a list of tensors, one for the gradient of one sliced tensor + + Returns: + a single tensor for the gradient of the original user, and an empty + list. + ''' + assert len(grads) > 1, 'There must be multiple gradients' + dxs, _ = super(Slice, self).backward(flag, grads) + return dxs[0], [] + class RNN(Layer): '''Recurrent layer with 4 types of units, namely lstm, gru, tanh and relu. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/python/singa/net.py ---------------------------------------------------------------------- diff --git a/python/singa/net.py b/python/singa/net.py index d34afbc..b6f973d 100644 --- a/python/singa/net.py +++ b/python/singa/net.py @@ -301,7 +301,10 @@ class FeedForwardNet(object): grads = grads[0] outs, _pgrads = cur.backward(kTrain, grads) pgrads.append(_pgrads) - output_of_layer[cur.name] = outs + if type(outs) is list: + output_of_layer[cur.name] = reversed(outs) + else: + output_of_layer[cur.name] = outs grads = [] ret = [] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/src/model/layer/concat.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/concat.cc b/src/model/layer/concat.cc index b1c0b11..88c2409 100644 --- a/src/model/layer/concat.cc +++ b/src/model/layer/concat.cc @@ -27,19 +27,28 @@ RegisterLayerClass(singacl_concat, Concat); void Concat::Setup(const vector<Shape>& in_shapes, const LayerConf& conf) { Layer::Setup(in_shapes, conf); - dim_size_.clear(); + out_sample_shape_.clear(); + slice_point_.clear(); axis_ = conf.concat_conf().axis(); - out_sample_shape_ = {0, 0}; - out_sample_shape_[1 - axis_] = in_shapes[0][1 - axis_]; - for (auto& s: in_shapes) { - out_sample_shape_[axis_] += s[axis_]; - dim_size_.push_back(s[axis_]); - // LOG(ERROR) << s[axis_]; + if (axis_ == 0) + out_sample_shape_.push_back(in_shapes[0][0]); + else { + size_t l = 0; + for (auto& s: in_shapes) { + l += s[0]; + } + out_sample_shape_.push_back(l); } } const vector<Tensor> Concat::Forward(int flag, const vector<Tensor>& inputs) { vector<Tensor> outputs; + slice_point_.clear(); + size_t offset = 0; + for (auto& x : inputs) { + offset += x.shape(axis_); + slice_point_.push_back(offset); + } if (inputs.size() == 1u) { outputs = inputs; } else { @@ -55,14 +64,13 @@ const std::pair<vector<Tensor>, vector<Tensor>> Concat::Backward( int flag, const vector<Tensor>& grads) { vector<Tensor> input_grad, param_grad; CHECK_EQ(grads.size(), 1u) << "Concat layer only have one output tensor."; - for (size_t i = 0, offset = 0; i < dim_size_.size(); i++) { + size_t last_offset = 0u; + for (auto p : slice_point_) { if (axis_ == 0) - input_grad.push_back(SliceRows(grads.at(0), offset, - offset + dim_size_[i])); + input_grad.push_back(SliceRows(grads.at(0), last_offset, p)); else - input_grad.push_back(SliceColumns(grads.at(0), offset, - offset + dim_size_[i])); - offset += dim_size_[i]; + input_grad.push_back(SliceColumns(grads.at(0), last_offset, p)); + last_offset = p; } return std::make_pair(input_grad, param_grad); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/src/model/layer/concat.h ---------------------------------------------------------------------- diff --git a/src/model/layer/concat.h b/src/model/layer/concat.h index 59293d7..4e9a967 100644 --- a/src/model/layer/concat.h +++ b/src/model/layer/concat.h @@ -45,8 +45,9 @@ class Concat : public Layer { protected: /// 0 for concat rows; 1 for concat cols int axis_ = 0; - /// dim_size_[i] the size of the i-th source tensor on the concat dim - vector<int> dim_size_; + /// slice_point_[i] the end offset of the i-th source tensor on the concat + /// axis of the result tensor + vector<int> slice_point_; Shape out_sample_shape_; }; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/src/model/layer/slice.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/slice.cc b/src/model/layer/slice.cc index 690a03e..66c05ee 100644 --- a/src/model/layer/slice.cc +++ b/src/model/layer/slice.cc @@ -28,32 +28,37 @@ RegisterLayerClass(singacl_slice, Slice); void Slice::Setup(const Shape& in_sample, const LayerConf& conf) { Layer::Setup(in_sample, conf); out_sample_shapes_.clear(); + slice_point_.clear(); axis_ = conf.slice_conf().axis(); int offset = 0; // #slice point = # out tensors - 1 for (size_t p : conf.slice_conf().slice_point()) { - Shape s{0, 0}; - s[1 - axis_] = in_sample[1 - axis_]; - s[axis_] = p - offset; - offset = p; - out_sample_shapes_.push_back(s); + slice_point_.push_back(p); + if (axis_ == 1) { + out_sample_shapes_.push_back({p - offset}); + offset = p; + } else { + out_sample_shapes_.push_back(in_sample); + } + } + slice_point_.push_back(in_sample[0]); + if (axis_ == 1) { + out_sample_shapes_.push_back({in_sample[0] - offset}); + } else { + out_sample_shapes_.push_back(in_sample); } - Shape s{0, 0}; - s[1 - axis_] = in_sample[1 - axis_]; - s[axis_] = in_sample[axis_] - offset; - out_sample_shapes_.push_back(s); } const vector<Tensor> Slice::Forward(int flag, const vector<Tensor>& inputs) { vector<Tensor> outputs; CHECK_EQ(inputs.size(), 1u) << "Split layer only have one input tensor."; size_t offset = 0; - for (auto& s : out_sample_shapes_) { + for (auto& s : slice_point_) { if (axis_ == 0) - outputs.push_back(SliceRows(inputs.at(0), offset, offset + s[axis_])); + outputs.push_back(SliceRows(inputs.at(0), offset, s)); else - outputs.push_back(SliceColumns(inputs.at(0), offset, offset + s[axis_])); - offset += s[axis_]; + outputs.push_back(SliceColumns(inputs.at(0), offset, s)); + offset = s; } return outputs; } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/src/model/layer/slice.h ---------------------------------------------------------------------- diff --git a/src/model/layer/slice.h b/src/model/layer/slice.h index 99ce468..7ed61fc 100644 --- a/src/model/layer/slice.h +++ b/src/model/layer/slice.h @@ -48,6 +48,8 @@ class Slice : public Layer { int axis_ = 0; /// out_sample_shapes_[i] is the shape of the i-th output tensor vector<Shape> out_sample_shapes_; + /// slice point, end offset of each output + vector<size_t> slice_point_; }; } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/test/python/test_layer.py ---------------------------------------------------------------------- diff --git a/test/python/test_layer.py b/test/python/test_layer.py index d22207f..c0f19f3 100644 --- a/test/python/test_layer.py +++ b/test/python/test_layer.py @@ -209,21 +209,33 @@ class TestPythonLayer(unittest.TestCase): t2 = tensor.Tensor((1, 3)) t1.set_value(1) t2.set_value(2) - lyr = layer.Concat('concat', 0, [t1.shape, t2.shape]) + lyr = layer.Concat('concat', 0, [(3,), (3,)]) t = lyr.forward(model_pb2.kTrain, [t1, t2]) - tnp = tensor.to_numpy(t[0]) + tnp = tensor.to_numpy(t) self.assertEquals(np.sum(tnp), 12) + t3 = tensor.Tensor((3, 3)) + t3.set_value(1.5) + grads, _ = lyr.backward(model_pb2.kTrain, [t3]) + gnp = tensor.to_numpy(grads[0]) + self.assertEquals(np.sum(gnp), 6 * 1.5) def test_slice(self): t = np.zeros((3, 3)) t[:, :2] = float(2) t[:, 2] = float(1) - lyr = layer.Slice('slice', 1, [2], t.shape) + lyr = layer.Slice('slice', 1, [2], (3,)) out = lyr.forward(model_pb2.kTrain, [tensor.from_numpy(t)]) t1 = tensor.to_numpy(out[0]) t2 = tensor.to_numpy(out[1]) self.assertEquals(np.average(t1), 2) self.assertEquals(np.average(t2), 1) + t1 = tensor.Tensor((3, 2)) + t2 = tensor.Tensor((3, 1)) + t1.set_value(1) + t2.set_value(2) + grad,_ = lyr.backward(model_pb2.kTrain, [t1, t2]) + gnp = tensor.to_numpy(grad) + self.assertEquals(np.sum(gnp), 12) if __name__ == '__main__': http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/test/singa/test_concat.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_concat.cc b/test/singa/test_concat.cc index 80183a7..d7f1060 100644 --- a/test/singa/test_concat.cc +++ b/test/singa/test_concat.cc @@ -31,10 +31,9 @@ TEST(Concat, Setup) { conf.set_type("singa_concat"); conf.mutable_concat_conf()->set_axis(0); singa::Concat layer; - layer.Setup({s1, s2}, conf); + layer.Setup({{3u}, {3u}}, conf); auto s = layer.GetOutputSampleShape(); EXPECT_EQ(s[0], 3u); - EXPECT_EQ(s[1], 3u); } void ForwardConcatRowTest(std::shared_ptr<singa::Device> dev) { @@ -45,7 +44,7 @@ void ForwardConcatRowTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_concat"); conf.mutable_concat_conf()->set_axis(0); singa::Concat layer; - layer.Setup({t1.shape(), t2.shape()}, conf); + layer.Setup({{c}, {c}}, conf); layer.ToDevice(dev); t1.SetValue(1.0f); @@ -74,7 +73,7 @@ void ForwardConcatColumnTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_concat"); conf.mutable_concat_conf()->set_axis(1); singa::Concat layer; - layer.Setup({t1.shape(), t2.shape()}, conf); + layer.Setup({{a}, {b}}, conf); layer.ToDevice(dev); t1.SetValue(1.0f); @@ -119,9 +118,15 @@ void BackwardConcatRowTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_concat"); conf.mutable_concat_conf()->set_axis(0); singa::Concat layer; - layer.Setup({{a, c}, {b, c}}, conf); + layer.Setup({{c}, {c}}, conf); layer.ToDevice(dev); + singa::Tensor t1({a, c}, dev); + singa::Tensor t2({b, c}, dev); + t1.SetValue(1.0f); + t2.SetValue(2.0f); + layer.Forward(singa::kTrain, {t1, t2}); + singa::Tensor t({a + b, c}, dev); singa::Uniform(-1.f, 1.f, &t); auto out = layer.Backward(singa::kTrain, {t}); @@ -149,9 +154,15 @@ void BackwardConcatColumnTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_concat"); conf.mutable_concat_conf()->set_axis(1); singa::Concat layer; - layer.Setup({{c, a}, {c, b}}, conf); + layer.Setup({{a}, {b}}, conf); layer.ToDevice(dev); + singa::Tensor t1({c, a}, dev); + singa::Tensor t2({c, b}, dev); + t1.SetValue(1.0f); + t2.SetValue(2.0f); + layer.Forward(singa::kTrain, {t1, t2}); + singa::Tensor t({c, a + b}, dev); singa::Uniform(-1.f, 1.f, &t); auto out = layer.Backward(singa::kTrain, {t}); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/82c12c2b/test/singa/test_slice.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_slice.cc b/test/singa/test_slice.cc index 6039c47..f6b8997 100644 --- a/test/singa/test_slice.cc +++ b/test/singa/test_slice.cc @@ -24,20 +24,17 @@ using singa::Shape; TEST(Slice, Setup) { - Shape s{2u, 3u}; singa::LayerConf conf; conf.set_type("singa_slice"); auto slice_conf = conf.mutable_slice_conf(); slice_conf->set_axis(1); slice_conf->add_slice_point(2); singa::Slice layer; - layer.Setup(s, conf); + layer.Setup({3u}, conf); auto s1 = layer.GetOutputSampleShape(0); EXPECT_EQ(s1[0], 2u); - EXPECT_EQ(s1[1], 2u); auto s2 = layer.GetOutputSampleShape(1); - EXPECT_EQ(s2[0], 2u); - EXPECT_EQ(s2[1], 1u); + EXPECT_EQ(s2[0], 1u); } void ForwardSliceRowTest(std::shared_ptr<singa::Device> dev) { @@ -46,9 +43,9 @@ void ForwardSliceRowTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_slice"); auto slice_conf = conf.mutable_slice_conf(); slice_conf->set_axis(0); - slice_conf->add_slice_point(2); + slice_conf->add_slice_point(a); singa::Slice layer; - layer.Setup({a + b ,c}, conf); + layer.Setup({c}, conf); layer.ToDevice(dev); singa::Tensor t({a + b, c}, dev); @@ -77,9 +74,9 @@ void ForwardSliceColumnTest(std::shared_ptr<singa::Device> dev) { conf.set_type("singa_slice"); auto slice_conf = conf.mutable_slice_conf(); slice_conf->set_axis(1); - slice_conf->add_slice_point(2); + slice_conf->add_slice_point(a); singa::Slice layer; - layer.Setup({c, a + b}, conf); + layer.Setup({a + b}, conf); layer.ToDevice(dev); singa::Tensor t({c, a + b}, dev); @@ -132,7 +129,7 @@ void BackwardSliceRowTest(std::shared_ptr<singa::Device> dev) { slice_conf->set_axis(0); slice_conf->add_slice_point(2); singa::Slice layer; - layer.Setup({a + b ,c}, conf); + layer.Setup({c}, conf); layer.ToDevice(dev); singa::Tensor t1({a, c}, dev); @@ -162,7 +159,7 @@ void BackwardSliceColumnTest(std::shared_ptr<singa::Device> dev) { slice_conf->set_axis(1); slice_conf->add_slice_point(2); singa::Slice layer; - layer.Setup({c , a + b}, conf); + layer.Setup({a + b}, conf); layer.ToDevice(dev); singa::Tensor t1({c, a}, dev);