SINGA-190 - Add prelu layer and flatten layer Format code. Fix warning info from compilation.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/58be3f80 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/58be3f80 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/58be3f80 Branch: refs/heads/dev Commit: 58be3f8079e8d00c9fee4e1ce319786cc4e9f225 Parents: 5afd81b Author: Wei Wang <[email protected]> Authored: Sun Jun 12 22:31:46 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sun Jun 12 22:31:46 2016 +0800 ---------------------------------------------------------------------- src/model/layer/flatten.cc | 19 +-- src/model/layer/flatten.h | 13 +- src/model/layer/prelu.cc | 62 ++------ src/model/layer/prelu.h | 11 +- src/proto/model.proto | 326 ++++++++++++++++++++++------------------ test/singa/test_flatten.cc | 68 ++++----- test/singa/test_prelu.cc | 46 +++--- 7 files changed, 261 insertions(+), 284 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/src/model/layer/flatten.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/flatten.cc b/src/model/layer/flatten.cc index 3ed37fe..7341394 100644 --- a/src/model/layer/flatten.cc +++ b/src/model/layer/flatten.cc @@ -31,22 +31,15 @@ const Tensor Flatten::Forward(int flag, const Tensor &input) { if (!Axis()) { // reshape to 1D size_t dim = output.Size(); - output.Reshape(Shape { - dim - }); - output_shape_ = Shape { dim } - ; + output.Reshape(Shape{dim}); + output_shape_ = Shape{dim}; } else { // reshape to 2D size_t dim1 = 1, dim2; - for (int i = 0; i < Axis(); i++) - dim1 *= output.shape(i); + for (int i = 0; i < Axis(); i++) dim1 *= output.shape(i); dim2 = output.Size() / dim1; - output.Reshape(Shape { - dim1, dim2 - }); - output_shape_ = Shape { dim1, dim2 } - ; + output.Reshape(Shape{dim1, dim2}); + output_shape_ = Shape{dim1, dim2}; } return output; } @@ -55,7 +48,7 @@ const std::pair<Tensor, vector<Tensor> > Flatten::Backward(int flag, const Tensor &grad) { vector<Tensor> param_grad; Tensor input_grad = grad; - input_grad.Reshape(Input_shape()); + input_grad.Reshape(input_shape_); return std::make_pair(input_grad, param_grad); } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/src/model/layer/flatten.h ---------------------------------------------------------------------- diff --git a/src/model/layer/flatten.h b/src/model/layer/flatten.h index cb36542..580b2ba 100644 --- a/src/model/layer/flatten.h +++ b/src/model/layer/flatten.h @@ -24,7 +24,7 @@ namespace singa { class Flatten : public Layer { -public: + public: /// \copydoc Layer::layer_type(); const std::string layer_type() const override { return "Flatten"; } @@ -35,15 +35,14 @@ public: const Tensor Forward(int flag, const Tensor &input) override; /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&); - const std::pair<Tensor, vector<Tensor> > Backward(int flag, - const Tensor &grad) - override; + const std::pair<Tensor, vector<Tensor> > Backward( + int flag, const Tensor &grad) override; const int Axis() const { return axis_; } - const Shape Input_shape() const { return input_shape_; } - const Shape Output_shape() const { return output_shape_; } + const Shape input_shape() const { return input_shape_; } + const Shape output_shape() const { return output_shape_; } -protected: + protected: /// flatten layer reshape the input to 2D, one from 0 to axis_-1, one from /// axis_ to end. /// if axis_ is 0, reshape the input to 1D. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/src/model/layer/prelu.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/prelu.cc b/src/model/layer/prelu.cc index 1d6a2e7..b916bed 100644 --- a/src/model/layer/prelu.cc +++ b/src/model/layer/prelu.cc @@ -25,8 +25,7 @@ void PReLU::Setup(const LayerConf &conf) { channel_shared_ = conf.prelu_conf().channel_shared(); format_ = conf.prelu_conf().format(); // Push back params into param_values_ - for (const auto &spec : conf.param()) - param_specs_.push_back(spec); + for (const auto &spec : conf.param()) param_specs_.push_back(spec); param_values_.push_back(&a_); } @@ -41,26 +40,18 @@ const Tensor PReLU::Forward(int flag, const Tensor &input) { c = temp.shape(1); h = temp.shape(2); w = temp.shape(3); - temp.Reshape(Shape { - n *c, h *w - }); - Tensor temp_a(Shape { - n, c - }); + temp.Reshape(Shape{n * c, h * w}); + Tensor temp_a(Shape{n, c}); Uniform(1.f, 1.f, &temp_a); MultRow(a_, &temp_a); - temp_a.Reshape(Shape { - n *c - }); + temp_a.Reshape(Shape{n * c}); MultColumn(temp_a, &temp); } else if (format_ == "NHWC") { n = temp.shape(0); h = temp.shape(1); w = temp.shape(2); c = temp.shape(3); - temp.Reshape(Shape { - n *h *w, c - }); + temp.Reshape(Shape{n * h * w, c}); MultRow(a_, &temp); } else { LOG(FATAL) << "Incorrect input format for prelu layer."; @@ -74,8 +65,7 @@ const Tensor PReLU::Forward(int flag, const Tensor &input) { const float a = a_.data<const float *>()[0]; output = input * ((input > 0.f) + (input <= 0.f) * a); } - if (flag & kTrain) - buf_.push(input); + if (flag & kTrain) buf_.push(input); return output; } @@ -96,33 +86,21 @@ const std::pair<Tensor, vector<Tensor> > PReLU::Backward(int flag, c = temp1.shape(1); h = temp1.shape(2); w = temp1.shape(3); - temp1.Reshape(Shape { - n *c, h *w - }); - Tensor temp_a(Shape { - n, c - }); + temp1.Reshape(Shape{n * c, h * w}); + Tensor temp_a(Shape{n, c}); Uniform(1.f, 1.f, &temp_a); MultRow(a_, &temp_a); - temp_a.Reshape(Shape { - n *c - }); + temp_a.Reshape(Shape{n * c}); MultColumn(temp_a, &temp1); - temp1.Reshape(Shape { - n, c, h, w - }); + temp1.Reshape(Shape{n, c, h, w}); } else if (format_ == "NHWC") { n = temp1.shape(0); h = temp1.shape(1); w = temp1.shape(2); c = temp1.shape(3); - temp1.Reshape(Shape { - n *h *w, c - }); + temp1.Reshape(Shape{n * h * w, c}); MultRow(a_, &temp1); - temp1.Reshape(Shape { - n, h, w, c - }); + temp1.Reshape(Shape{n, h, w, c}); } else { LOG(FATAL) << "Incorrect input format for prelu layer."; } @@ -130,22 +108,14 @@ const std::pair<Tensor, vector<Tensor> > PReLU::Backward(int flag, LOG(FATAL) << "Incorrect input format for prelu layer."; } input_grad = grad * input * ((input > 0.f) + temp1); - Tensor temp2 = grad * input * (input <= 0.f), temp3(Shape { - n *c - }); + Tensor temp2 = grad * input * (input <= 0.f), temp3(Shape{n * c}); if (format_ == "NCHW") { - temp2.Reshape(Shape { - n *c, h *w - }); + temp2.Reshape(Shape{n * c, h * w}); SumColumns(temp2, &temp3); - temp3.Reshape(Shape { - n, c - }); + temp3.Reshape(Shape{n, c}); SumRows(temp3, &da); } else if (format_ == "NHWC") { - temp2.Reshape(Shape { - n *h *w, c - }); + temp2.Reshape(Shape{n * h * w, c}); SumRows(temp2, &da); } } else { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/src/model/layer/prelu.h ---------------------------------------------------------------------- diff --git a/src/model/layer/prelu.h b/src/model/layer/prelu.h index 1a01d98..d165fe2 100644 --- a/src/model/layer/prelu.h +++ b/src/model/layer/prelu.h @@ -26,7 +26,7 @@ namespace singa { class PReLU : public Layer { public: /// \copydoc Layer::layer_type() - const std::string layer_type() const override { return "PReLU"; } + const std::string layer_type() const override { return "PReLU"; } /// \copydoc Layer::Setup(const LayerConf&); void Setup(const LayerConf &conf) override; @@ -35,9 +35,8 @@ class PReLU : public Layer { const Tensor Forward(int flag, const Tensor &input) override; /// \copydoc Layer::Backward(int, const Tensor&, const Tensor&); - const std::pair<Tensor, vector<Tensor> > Backward(int flag, - const Tensor &grad) - override; + const std::pair<Tensor, vector<Tensor> > Backward( + int flag, const Tensor &grad) override; void ToDevice(Device *device); @@ -52,8 +51,8 @@ class PReLU : public Layer { protected: bool channel_shared_; - std::string format_; // format_ has two valid value, i.e. NCHW, NHWC - Tensor a_; // shape of a_ is 2D, i.e. (channels, 1) + std::string format_; // format_ has two valid value, i.e. NCHW, NHWC + Tensor a_; // shape of a_ is 2D, i.e. (channels, 1) std::stack<Tensor> buf_; }; } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/src/proto/model.proto ---------------------------------------------------------------------- diff --git a/src/proto/model.proto b/src/proto/model.proto index 1d1f3cf..590fdd6 100644 --- a/src/proto/model.proto +++ b/src/proto/model.proto @@ -33,59 +33,67 @@ package singa; /// using Python (or C++/Java). // Specifies the shape (dimensions) of a Blob. -message BlobShape { repeated int64 dim = 1[packed = true]; } +message BlobShape { + repeated int64 dim = 1 [packed = true]; +} message BlobProto { optional BlobShape shape = 7; - repeated float data = 5[packed = true]; - repeated float diff = 6[packed = true]; - repeated double double_data = 8[packed = true]; - repeated double double_diff = 9[packed = true]; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; + repeated double double_data = 8 [packed = true]; + repeated double double_diff = 9 [packed = true]; // 4D dimensions -- deprecated. Use "shape" instead. - optional int32 num = 1[default = 0]; - optional int32 channels = 2[default = 0]; - optional int32 height = 3[default = 0]; - optional int32 width = 4[default = 0]; + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; } message FillerConf { // The filler type, case insensitive - optional string type = 1[default = 'constant']; - optional float value = 2[default = 0]; // the value in constant filler - optional float min = 3[default = 0]; // the min value in uniform filler - optional float max = 4[default = 1]; // the max value in uniform filler - optional float mean = 5[default = 0]; // the mean value in Gaussian filler - optional float std = 6[default = 1]; // the std value in Gaussian filler + optional string type = 1 [default = 'constant']; + optional float value = 2 [default = 0]; // the value in constant filler + optional float min = 3 [default = 0]; // the min value in uniform filler + optional float max = 4 [default = 1]; // the max value in uniform filler + optional float mean = 5 [default = 0]; // the mean value in Gaussian filler + optional float std = 6 [default = 1]; // the std value in Gaussian filler // The expected number of non-zero output weights for a given input in // Gaussian filler -- the default -1 means don't perform sparsification. /* optional int32 sparse = 7 [default = -1]; */ // Normalize the filler variance by fan_in, fan_out, or their average. // Applies to 'xavier' and 'msra' fillers. enum VarianceNorm { - FAN_IN = 0; FAN_OUT = 1; AVERAGE = 2; - } optional VarianceNorm variance_norm = 8[default = FAN_IN]; + FAN_IN = 0; + FAN_OUT = 1; + AVERAGE = 2; + } + optional VarianceNorm variance_norm = 8 [default = FAN_IN]; } /// SINGA message message OptimizerConf { // case insensitive - optional string type = 1[default = "sgd"]; + optional string type = 1 [default = "sgd"]; // used by RMSprop and Adadelta - optional float rho = 2[default = 0.001]; + optional float rho = 2 [default = 0.001]; // used by Adam and AdamMax - optional float beta_1 = 3[default = 0.9]; - optional float beta_2 = 4[default = 0.999]; + optional float beta_1 = 3 [default = 0.9]; + optional float beta_2 = 4 [default = 0.999]; // used by vanilla sgd and nesterov - optional float momentum = 5[default = 0.9]; + optional float momentum = 5 [default = 0.9]; + + // delta is used to avoid dividing zero + optional float delta = 6 [default = 1e-8]; } message ConstraintConf { // case insensitive to limit the parameter value/gradient scale - optional string type = 1[default = "l2"]; + optional string type = 1 [default = "l2"]; // e.g., the threshold for limiting the parameter scale. optional float threshold = 2; } @@ -93,7 +101,7 @@ message ConstraintConf { /// SINGA message message RegularizerConf { // case insensitive to regularize the parameters, e.g., L2. - optional string type = 1[default = "l2"]; + optional string type = 1 [default = "l2"]; // e.g., the weight decay for L2 regularizer optional float coefficient = 2; } @@ -119,10 +127,10 @@ message ParamSpec { */ // The multiplier on the global learning rate for this parameter. - optional float lr_mult = 3[default = 1.0]; + optional float lr_mult = 3 [default = 1.0]; // The multiplier on the global weight decay for this parameter. - optional float decay_mult = 4[default = 1.0]; + optional float decay_mult = 4 [default = 1.0]; // SINGA uses this filed internally. Users just configure the fillers in // Layer specific conf message as caffe (style). @@ -132,13 +140,14 @@ message ParamSpec { } enum Phase { - kTrain = 4; kEval = 8; -} - // NOTE - // Update the next available ID when you add a new LayerConf field. - // - // LayerConf next available layer-specific ID: 139 (last added: tile_param) - message LayerConf { + kTrain = 4; + kEval = 8; +} +// NOTE +// Update the next available ID when you add a new LayerConf field. +// +// LayerConf next available layer-specific ID: 139 (last added: tile_param) +message LayerConf { optional string name = 1; // the layer name optional string type = 2; // the layer type /* repeated string bottom = 3; // the name of each bottom blob */ @@ -242,8 +251,7 @@ message TransformationConf { optional uint32 crop_size = 3 [default = 0]; // mean_file and mean_value cannot be specified at the same time optional string mean_file = 4; - // if specified can be repeated once (would substract it from all the -channels) + // if specified can be repeated once (would substract it from all the channels) // or can be repeated the same number of times as channels // (would subtract them from the corresponding channel) repeated float mean_value = 5; @@ -260,33 +268,34 @@ message LossConf { optional int32 ignore_label = 1; // If true, normalize each batch across all instances (including spatial // dimesions, but not ignored instances); else, divide by batch size only. - optional bool normalize = 2[default = true]; + optional bool normalize = 2 [default = true]; } message MetricConf { // When computing accuracy, count as correct by comparing the true label to // the top k scoring classes. By default, only compare to the top scoring // class (i.e. argmax). - optional uint32 top_k = 1[default = 1]; + optional uint32 top_k = 1 [default = 1]; // The "label" axis of the prediction blob, whose argmax corresponds to the // predicted label -- may be negative to index from the end (e.g., -1 for the // last axis). For example, if axis == 1 and the predictions are // (N x C x H x W), the label blob is expected to contain N*H*W ground truth // labels with integer values in {0, 1, ..., C-1}. - optional int32 axis = 2[default = 1]; + optional int32 axis = 2 [default = 1]; // If specified, ignore instances with the given label. optional int32 ignore_label = 3; } -// Messages that store hyper-parameters used by individual layer types follow, -// in +// Messages that store hyper-parameters used by individual layer types follow, in // alphabetical order. + + message ArgMaxConf { // If true produce pairs (argmax, maxval) - optional bool out_max_val = 1[default = false]; - optional uint32 top_k = 2[default = 1]; + optional bool out_max_val = 1 [default = false]; + optional uint32 top_k = 2 [default = 1]; // The axis along which to maximise -- may be negative to index from the // end (e.g., -1 for the last axis). // By default ArgMaxLayer maximizes over the flattened trailing dimensions @@ -299,51 +308,54 @@ message ConcatConf { // end (e.g., -1 for the last axis). Other axes must have the // same dimension for all the bottom blobs. // By default, ConcatLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 2[default = 1]; + optional int32 axis = 2 [default = 1]; // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 concat_dim = 1[default = 1]; + optional uint32 concat_dim = 1 [default = 1]; } message ContrastiveLossConf { // margin for dissimilar pair - optional float margin = 1[default = 1.0]; + optional float margin = 1 [default = 1.0]; // The first implementation of this cost did not exactly match the cost of // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. // legacy_version = false (the default) uses (margin - d)^2 as proposed in the // Hadsell paper. New models should probably use this version. // legacy_version = true uses (margin - d^2). This is kept to support / // reproduce existing models and results - optional bool legacy_version = 2[default = false]; + optional bool legacy_version = 2 [default = false]; } message ConvolutionConf { optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2[default = true]; // whether to have bias terms + optional bool bias_term = 2 [default = true]; // whether to have bias terms // Pad, kernel size, and stride are all given as a single value for equal // dimensions in all spatial dimensions, or once per spatial dimension. - repeated uint32 pad = 3; // The padding size; defaults to 0 + repeated uint32 pad = 3; // The padding size; defaults to 0 repeated uint32 kernel_size = 4; // The kernel size - repeated uint32 stride = 6; // The stride; defaults to 1 + repeated uint32 stride = 6; // The stride; defaults to 1 // For 2D convolution only, the *_h and *_w versions may also be used to // specify both spatial dimensions. - optional uint32 pad_h = 9[default = 0]; // The padding height (2D only) - optional uint32 pad_w = 10[default = 0]; // The padding width (2D only) - optional uint32 kernel_h = 11; // The kernel height (2D only) - optional uint32 kernel_w = 12; // The kernel width (2D only) - optional uint32 stride_h = 13; // The stride height (2D only) - optional uint32 stride_w = 14; // The stride width (2D only) + optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) + optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) + optional uint32 kernel_h = 11; // The kernel height (2D only) + optional uint32 kernel_w = 12; // The kernel width (2D only) + optional uint32 stride_h = 13; // The stride height (2D only) + optional uint32 stride_w = 14; // The stride width (2D only) // SINGA: not supported. // optional uint32 group = 5 [default = 1]; // The group size for group conv optional FillerConf weight_filler = 7; // The filler for the weight - optional FillerConf bias_filler = 8; // The filler for the bias + optional FillerConf bias_filler = 8; // The filler for the bias enum Engine { - DEFAULT = 0; CAFFE = 1; CUDNN = 2; - } optional Engine engine = 15[default = DEFAULT]; + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 15 [default = DEFAULT]; // The axis to interpret as "channels" when performing convolution. // Preceding dimensions are treated as independent inputs; @@ -365,12 +377,13 @@ message ConvolutionConf { // SINGA: not supported; // optional bool force_nd_im2col = 17 [default = false]; + // SINGA: add by xiangrui // cudnn workspace size in MB - optional int32 workspace_byte_limit = 50[default = 512]; + optional int32 workspace_byte_limit = 50 [default = 512]; // cudnn algorithm preference // options: "fastest", "limited_workspace", "no_workspace" - optional string prefer = 51[default = "fastest"]; + optional string prefer = 51 [default = "fastest"]; // input shape optional int32 channels = 52; optional int32 height = 53; @@ -414,7 +427,7 @@ message DataConf { */ message DropoutConf { - optional float dropout_ratio = 1[default = 0.5]; // dropout ratio + optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio } // DummyDataLayer fills any number of arbitrarily shaped blobs with random @@ -438,13 +451,16 @@ message DummyDataConf { message EltwiseConf { enum EltwiseOp { - PROD = 0; SUM = 1; MAX = 2; - } optional EltwiseOp operation = 1[default = SUM]; // element-wise operation + PROD = 0; + SUM = 1; + MAX = 2; + } + optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation repeated float coeff = 2; // blob-wise coefficient for SUM operation // Whether to use an asymptotically slower (for >2 inputs) but stabler method // of computing the gradient for the PROD operation. (No effect for SUM op.) - optional bool stable_prod_grad = 3[default = true]; + optional bool stable_prod_grad = 3 [default = true]; } // Message that stores hyper-parameters used by EmbedLayer @@ -455,9 +471,9 @@ message EmbedConf { // 1 greater than the maximum possible input value. optional uint32 input_dim = 2; - optional bool bias_term = 3[default = true]; // Whether to use a bias term - optional FillerConf weight_filler = 4; // The filler for the weight - optional FillerConf bias_filler = 5; // The filler for the bias + optional bool bias_term = 3 [default = true]; // Whether to use a bias term + optional FillerConf weight_filler = 4; // The filler for the weight + optional FillerConf bias_filler = 5; // The filler for the bias } @@ -466,21 +482,21 @@ message ExpConf { // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. // Or if base is set to the default (-1), base is set to e, // so y = exp(shift + scale * x). - optional float base = 1[default = -1.0]; - optional float scale = 2[default = 1.0]; - optional float shift = 3[default = 0.0]; + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; } /// Message that stores hyper-parameters used by FlattenLayer message FlattenConf { // The first axis to flatten: all preceding axes are retained in the output. // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 1[default = 1]; + optional int32 axis = 1 [default = 1]; // The last axis to flatten: all following axes are retained in the output. // May be negative to index from the end (e.g., the default -1 for the last // axis). - optional int32 end_axis = 2[default = -1]; + optional int32 end_axis = 2 [default = -1]; } /* @@ -506,10 +522,11 @@ message HDF5OutputConf { message HingeLossConf { enum Norm { - L1 = 1; L2 = 2; + L1 = 1; + L2 = 2; } - // Specify the Norm to use L1 or L2 - optional Norm norm = 1[default = L1]; + // Specify the Norm to use L1 or L2 + optional Norm norm = 1 [default = L1]; } /* @@ -552,29 +569,29 @@ message InfogainLossConf { message InnerProductConf { optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2[default = true]; // whether to have bias terms - optional FillerConf weight_filler = 3; // The filler for the weight - optional FillerConf bias_filler = 4; // The filler for the bias + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional FillerConf weight_filler = 3; // The filler for the weight + optional FillerConf bias_filler = 4; // The filler for the bias // The first axis to be lumped into a single inner product computation; // all preceding axes are retained in the output. // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 5[default = 1]; + optional int32 axis = 5 [default = 1]; } message DenseConf { optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2[default = true]; // whether to have bias terms - optional FillerConf weight_filler = 3; // The filler for the weight - optional FillerConf bias_filler = 4; // The filler for the bias + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional FillerConf weight_filler = 3; // The filler for the weight + optional FillerConf bias_filler = 4; // The filler for the bias // The first axis to be lumped into a single inner product computation; // all preceding axes are retained in the output. // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 5[default = 1]; + optional int32 axis = 5 [default = 1]; optional uint32 num_input = 20; // The number of inputs for the layer - optional bool transpose = 21[default = false]; // whether transpose or not + optional bool transpose = 21 [default = false]; // whether transpose or not } // Message that stores hyper-parameters used by LogLayer @@ -582,20 +599,22 @@ message LogConf { // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. // Or if base is set to the default (-1), base is set to e, // so y = ln(shift + scale * x) = log_e(shift + scale * x) - optional float base = 1[default = -1.0]; - optional float scale = 2[default = 1.0]; - optional float shift = 3[default = 0.0]; + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; } // Message that stores hyper-parameters used by LRNLayer message LRNConf { - optional uint32 local_size = 1[default = 5]; - optional float alpha = 2[default = 1.]; - optional float beta = 3[default = 0.75]; + optional uint32 local_size = 1 [default = 5]; + optional float alpha = 2 [default = 1.]; + optional float beta = 3 [default = 0.75]; enum NormRegion { - ACROSS_CHANNELS = 0; WITHIN_CHANNEL = 1; - } optional NormRegion norm_region = 4[default = ACROSS_CHANNELS]; - optional float k = 5[default = 1.]; + ACROSS_CHANNELS = 0; + WITHIN_CHANNEL = 1; + } + optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; + optional float k = 5 [default = 1.]; } message MemoryDataConf { @@ -607,30 +626,33 @@ message MemoryDataConf { message MVNConf { // This parameter can be set to false to normalize mean only - optional bool normalize_variance = 1[default = true]; + optional bool normalize_variance = 1 [default = true]; // This parameter can be set to true to perform DNN-like MVN - optional bool across_channels = 2[default = false]; + optional bool across_channels = 2 [default = false]; // Epsilon for not dividing by zero while normalizing variance - optional float eps = 3[default = 1e-9]; + optional float eps = 3 [default = 1e-9]; } message PoolingConf { enum PoolMethod { - MAX = 0; AVE = 1; STOCHASTIC = 2; - } optional PoolMethod pool = 1[default = MAX]; // The pooling method + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 1 [default = MAX]; // The pooling method // Pad, kernel size, and stride are all given as a single value for equal // dimensions in height and width or as Y, X pairs. - optional uint32 pad = 4[default = 0]; // The padding size (equal in Y, X) - optional uint32 pad_h = 9[default = 0]; // The padding height - optional uint32 pad_w = 10[default = 0]; // The padding width - optional uint32 kernel_size = 2; // The kernel size (square) - optional uint32 kernel_h = 5; // The kernel height - optional uint32 kernel_w = 6; // The kernel width - optional uint32 stride = 3[default = 1]; // The stride (equal in Y, X) - optional uint32 stride_h = 7; // The stride height - optional uint32 stride_w = 8; // The stride width + optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) + optional uint32 pad_h = 9 [default = 0]; // The padding height + optional uint32 pad_w = 10 [default = 0]; // The padding width + optional uint32 kernel_size = 2; // The kernel size (square) + optional uint32 kernel_h = 5; // The kernel height + optional uint32 kernel_w = 6; // The kernel width + optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) + optional uint32 stride_h = 7; // The stride height + optional uint32 stride_w = 8; // The stride width /* enum Engine { DEFAULT = 0; @@ -641,20 +663,20 @@ message PoolingConf { */ // If global_pooling then it will pool over the size of the bottom by doing // kernel_h = bottom->height and kernel_w = bottom->width - optional bool global_pooling = 12[default = false]; + optional bool global_pooling = 12 [default = false]; // Shape of source optional int32 channels = 50; optional int32 height = 51; optional int32 width = 52; // whether to propagate nan - optional bool nan_prop = 53[default = false]; + optional bool nan_prop = 53 [default = false]; } message PowerConf { // PowerLayer computes outputs y = (shift + scale * x) ^ power. - optional float power = 1[default = 1.0]; - optional float scale = 2[default = 1.0]; - optional float shift = 3[default = 0.0]; + optional float power = 1 [default = 1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; } /* message PythonConf { @@ -665,8 +687,7 @@ message PythonConf { // string, dictionary in Python dict format, JSON, etc. You may parse this // string in `setup` method and use it in `forward` and `backward`. optional string param_str = 3 [default = '']; - // Whether this PythonLayer is shared among worker solvers during data -parallelism. + // Whether this PythonLayer is shared among worker solvers during data parallelism. // If true, each worker solver sequentially run forward from this layer. // This value should be set true if you are using it as a data layer. optional bool share_in_parallel = 4 [default = false]; @@ -676,8 +697,13 @@ parallelism. // Message that stores hyper-parameters used by ReductionLayer message ReductionConf { enum ReductionOp { - SUM = 1; ASUM = 2; SUMSQ = 3; MEAN = 4; - } optional ReductionOp operation = 1[default = SUM]; // reduction operation + SUM = 1; + ASUM = 2; + SUMSQ = 3; + MEAN = 4; + } + + optional ReductionOp operation = 1 [default = SUM]; // reduction operation // The first axis to reduce to a scalar -- may be negative to index from the // end (e.g., -1 for the last axis). @@ -692,9 +718,9 @@ message ReductionConf { // If axis == 0 (the default), the output Blob always has the empty shape // (count 1), performing reduction across the entire input -- // often useful for creating new loss functions. - optional int32 axis = 2[default = 0]; + optional int32 axis = 2 [default = 0]; - optional float coeff = 3[default = 1.0]; // coefficient for output + optional float coeff = 3 [default = 1.0]; // coefficient for output } // Message that stores hyper-parameters used by ReLULayer @@ -704,7 +730,7 @@ message ReLUConf { // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities // improve neural network acoustic models. In ICML Workshop on Deep Learning // for Audio, Speech, and Language Processing. - optional float negative_slope = 1[default = 0]; + optional float negative_slope = 1 [default = 0]; /* enum Engine { DEFAULT = 0; @@ -775,50 +801,58 @@ message ReshapeConf { // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } // - optional int32 axis = 2[default = 0]; - optional int32 num_axes = 3[default = -1]; + optional int32 axis = 2 [default = 0]; + optional int32 num_axes = 3 [default = -1]; } message SigmoidConf { enum Engine { - DEFAULT = 0; CAFFE = 1; CUDNN = 2; - } optional Engine engine = 1[default = DEFAULT]; + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; } message SliceConf { // The axis along which to slice -- may be negative to index from the end // (e.g., -1 for the last axis). // By default, SliceLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 3[default = 1]; + optional int32 axis = 3 [default = 1]; repeated uint32 slice_point = 2; // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 slice_dim = 1[default = 1]; + optional uint32 slice_dim = 1 [default = 1]; } -// Message that stores hyper-parameters used by SoftmaxLayer, -// SoftmaxWithLossLayer +// Message that stores hyper-parameters used by SoftmaxLayer, SoftmaxWithLossLayer message SoftmaxConf { enum Engine { - DEFAULT = 0; CAFFE = 1; CUDNN = 2; - } optional Engine engine = 1[default = DEFAULT]; + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; // The axis along which to perform the softmax -- may be negative to index // from the end (e.g., -1 for the last axis). // Any other axes will be evaluated as independent softmaxes. - optional int32 axis = 2[default = 1]; + optional int32 axis = 2 [default = 1]; } message TanHConf { enum Engine { - DEFAULT = 0; CAFFE = 1; CUDNN = 2; - } optional Engine engine = 1[default = DEFAULT]; + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; } // Message that stores hyper-parameters used by TileLayer message TileConf { // The index of the axis to tile. - optional int32 axis = 1[default = 1]; + optional int32 axis = 1 [default = 1]; // The number of copies (tiles) of the blob to output. optional int32 tiles = 2; @@ -826,7 +860,7 @@ message TileConf { // Message that stores hyper-parameters used by ThresholdLayer message ThresholdConf { - optional float threshold = 1[default = 0]; // Strictly positive values + optional float threshold = 1 [default = 0]; // Strictly positive values } /* @@ -866,12 +900,18 @@ message WindowDataConf { message SPPConf { enum PoolMethod { - MAX = 0; AVE = 1; STOCHASTIC = 2; - } optional uint32 pyramid_height = 1; - optional PoolMethod pool = 2[default = MAX]; // The pooling method + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional uint32 pyramid_height = 1; + optional PoolMethod pool = 2 [default = MAX]; // The pooling method enum Engine { - DEFAULT = 0; CAFFE = 1; CUDNN = 2; - } optional Engine engine = 6[default = DEFAULT]; + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; } message PReLUConf { @@ -881,15 +921,15 @@ message PReLUConf { // Initial value of a_i. Default is a_i=0.25 for all i. optional FillerConf filler = 1; // Whether or not slope paramters are shared across channels. - optional bool channel_shared = 2[default = false]; - // format of the input. Default is NCHW. - optional string format = 50[default = "NCHW"]; + optional bool channel_shared = 2 [default = false]; + + optional string format = 20 [default = "NCHW"]; } message BatchNormConf { // Used in the moving average computation runningMean = // newMean*factor + runningMean*(1-factor). - optional double factor = 1[default = 0.9]; + optional double factor = 1 [default = 0.9]; // input shape optional int32 channels = 2; optional int32 height = 3; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/test/singa/test_flatten.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_flatten.cc b/test/singa/test_flatten.cc index 906e4b8..0ba8d3c 100644 --- a/test/singa/test_flatten.cc +++ b/test/singa/test_flatten.cc @@ -36,10 +36,10 @@ TEST(Flatten, Setup) { } TEST(Flatten, ForwardCPU) { - const float x[] = { 1.f, 2.f, 3.f, -2.f, -3.f, -4.f, 1.5f, -1.5f, 0.f, -0.5f, - -2.f, -1.f }; + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; size_t n = sizeof(x) / sizeof(float); - singa::Shape s = { 2, 1, 3, 2 }; + singa::Shape s = {2, 1, 3, 2}; singa::Tensor in(s); in.CopyDataFromHostPtr<float>(x, n); @@ -52,22 +52,19 @@ TEST(Flatten, ForwardCPU) { singa::Tensor out = flt.Forward(singa::kTrain, in); EXPECT_EQ(n, out.Size()); - EXPECT_EQ(6, out.shape(0)); - EXPECT_EQ(2, out.shape(1)); + EXPECT_EQ(6u, out.shape(0)); + EXPECT_EQ(2u, out.shape(1)); const float *yptr = out.data<const float *>(); - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(x[i], yptr[i]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]); } TEST(Flatten, BackwardCPU) { // directly use input as the output_grad for backward // note that only the shape of input really matters - const float dy[] = { 1.f, 2.f, 3.f, -2.f, -3.f, -4.f, 1.5f, -1.5f, 0.f, -0.5f, - -2.f, -1.f }; + const float dy[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; size_t n = sizeof(dy) / sizeof(float); - singa::Tensor in(singa::Shape { - 2, 1, 3, 2 - }); + singa::Tensor in(singa::Shape{2, 1, 3, 2}); in.CopyDataFromHostPtr<float>(dy, n); int axis = 2; @@ -81,24 +78,20 @@ TEST(Flatten, BackwardCPU) { const auto out = flt.Backward(singa::kTrain, temp); const float *xptr = out.first.data<const float *>(); EXPECT_EQ(n, out.first.Size()); - EXPECT_EQ(2, out.first.shape(0)); - EXPECT_EQ(1, out.first.shape(1)); - EXPECT_EQ(3, out.first.shape(2)); - EXPECT_EQ(2, out.first.shape(3)); - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(dy[i], xptr[i]); + EXPECT_EQ(2u, out.first.shape(0)); + EXPECT_EQ(1u, out.first.shape(1)); + EXPECT_EQ(3u, out.first.shape(2)); + EXPECT_EQ(2u, out.first.shape(3)); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dy[i], xptr[i]); } #ifdef USE_CUDA TEST(Flatten, ForwardGPU) { - const float x[] = { 1.f, 2.f, 3.f, -2.f, -3.f, -4.f, 1.5f, -1.5f, 0.f, -0.5f, - -2.f, -1.f }; + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; size_t n = sizeof(x) / sizeof(float); singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape { - 2, 1, 3, 2 - }, - &cuda); + singa::Tensor in(singa::Shape{2, 1, 3, 2}, &cuda); in.CopyDataFromHostPtr<float>(x, n); int axis = 3; @@ -112,24 +105,20 @@ TEST(Flatten, ForwardGPU) { singa::CppCPU host(0, 1); out.ToDevice(&host); EXPECT_EQ(n, out.Size()); - EXPECT_EQ(6, out.shape(0)); - EXPECT_EQ(2, out.shape(1)); + EXPECT_EQ(6u, out.shape(0)); + EXPECT_EQ(2u, out.shape(1)); const float *yptr = out.data<const float *>(); - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(x[i], yptr[i]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(x[i], yptr[i]); } TEST(Flatten, BackwardGPU) { // directly use input as the output_grad for backward // note that only the shape of input really matters - const float dy[] = { 1.f, 2.f, 3.f, -2.f, -3.f, -4.f, 1.5f, -1.5f, 0.f, -0.5f, - -2.f, -1.f }; + const float dy[] = {1.f, 2.f, 3.f, -2.f, -3.f, -4.f, + 1.5f, -1.5f, 0.f, -0.5f, -2.f, -1.f}; size_t n = sizeof(dy) / sizeof(float); singa::CudaGPU cuda(0, 1); - singa::Tensor in(singa::Shape { - 2, 1, 3, 2 - }, - &cuda); + singa::Tensor in(singa::Shape{2, 1, 3, 2}, &cuda); in.CopyDataFromHostPtr<float>(dy, n); int axis = 2; @@ -146,11 +135,10 @@ TEST(Flatten, BackwardGPU) { in_diff.ToDevice(&host); const float *xptr = in_diff.data<const float *>(); EXPECT_EQ(n, in_diff.Size()); - EXPECT_EQ(2, in_diff.shape(0)); - EXPECT_EQ(1, in_diff.shape(1)); - EXPECT_EQ(3, in_diff.shape(2)); - EXPECT_EQ(2, in_diff.shape(3)); - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(dy[i], xptr[i]); + EXPECT_EQ(2u, in_diff.shape(0)); + EXPECT_EQ(1u, in_diff.shape(1)); + EXPECT_EQ(3u, in_diff.shape(2)); + EXPECT_EQ(2u, in_diff.shape(3)); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dy[i], xptr[i]); } #endif // USE_CUDA http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/58be3f80/test/singa/test_prelu.cc ---------------------------------------------------------------------- diff --git a/test/singa/test_prelu.cc b/test/singa/test_prelu.cc index 2dde9e9..6fc372b 100644 --- a/test/singa/test_prelu.cc +++ b/test/singa/test_prelu.cc @@ -39,13 +39,11 @@ TEST(PReLU, Setup) { } TEST(PReLU, ForwardCPU) { - const float x[] = { 1.f, 2.f, 3.f, -2.f, -3.f, -1.f, -1.f, 2.f, -1.f, -2.f, - -2.f, -1.f }; + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; size_t n = sizeof(x) / sizeof(float); size_t batchsize = 2, c = 3, h = 2, w = 1; - singa::Tensor in(singa::Shape { - batchsize, h, w, c - }); + singa::Tensor in(singa::Shape{batchsize, h, w, c}); in.CopyDataFromHostPtr<float>(x, n); PReLU prelu; @@ -55,10 +53,8 @@ TEST(PReLU, ForwardCPU) { preluconf->set_format("NHWC"); prelu.Setup(conf); - const float neg_slope[] = { 0.25f, 0.5f, 0.75f }; - singa::Tensor a(singa::Shape { - c - }); + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; + singa::Tensor a(singa::Shape{c}); a.CopyDataFromHostPtr<float>(neg_slope, c); prelu.Set_a(a); @@ -79,17 +75,15 @@ TEST(PReLU, ForwardCPU) { y[i] = std::max(x[i], 0.f) + neg_slope[pos] * std::min(x[i], 0.f); } } - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(y[i], yptr[i]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(y[i], yptr[i]); } TEST(PReLU, BackwardCPU) { - const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; + const float x[] = {1.f, 2.f, 3.f, -2.f, -3.f, -1.f, + -1.f, 2.f, -1.f, -2.f, -2.f, -1.f}; size_t n = sizeof(x) / sizeof(float); size_t batchsize = 2, c = 3, h = 2, w = 1; - singa::Tensor in(singa::Shape { - batchsize, c, h, w - }); + singa::Tensor in(singa::Shape{batchsize, c, h, w}); in.CopyDataFromHostPtr<float>(x, n); PReLU prelu; @@ -99,20 +93,16 @@ TEST(PReLU, BackwardCPU) { preluconf->set_format("NCHW"); prelu.Setup(conf); - const float neg_slope[] = { 0.25f, 0.5f, 0.75f }; - singa::Tensor a(singa::Shape { - c - }); + const float neg_slope[] = {0.25f, 0.5f, 0.75f}; + singa::Tensor a(singa::Shape{c}); a.CopyDataFromHostPtr<float>(neg_slope, c); prelu.Set_a(a); singa::Tensor out = prelu.Forward(singa::kTrain, in); - const float grad[] = { 1.f, 2.f, -2.f, -1.f, -1.f, -3.f, 2.f, -2.f, 1.f, 1.f, - -2.f, 0.f }; - singa::Tensor out_diff(singa::Shape { - batchsize, c, h, w - }); + const float grad[] = {1.f, 2.f, -2.f, -1.f, -1.f, -3.f, + 2.f, -2.f, 1.f, 1.f, -2.f, 0.f}; + singa::Tensor out_diff(singa::Shape{batchsize, c, h, w}); out_diff.CopyDataFromHostPtr<float>(grad, n); const auto ret = prelu.Backward(singa::kTrain, out_diff); const float *xptr = ret.first.data<const float *>(); @@ -120,7 +110,7 @@ TEST(PReLU, BackwardCPU) { float *dx = new float[n]; size_t div_factor = prelu.Channel_shared() ? c : 1; size_t params = prelu.Channel_shared() ? 1 : c; - float da[] = { 0.f, 0.f, 0.f }; + float da[] = {0.f, 0.f, 0.f}; if (prelu.Format() == "NCHW") { for (size_t i = 0; i < n; i++) { size_t pos = i / (h * w) % c / div_factor; @@ -142,8 +132,6 @@ TEST(PReLU, BackwardCPU) { da[pos] += grad[i] * std::min(x[i], 0.f); } } - for (size_t i = 0; i < n; i++) - EXPECT_FLOAT_EQ(dx[i], xptr[i]); - for (size_t i = 0; i < params; i++) - EXPECT_FLOAT_EQ(da[i], aptr[i]); + for (size_t i = 0; i < n; i++) EXPECT_FLOAT_EQ(dx[i], xptr[i]); + for (size_t i = 0; i < params; i++) EXPECT_FLOAT_EQ(da[i], aptr[i]); }
