SINGA-176 - Add loss and metric base classes Rename layer.proto to model.proto, which includes proto messages for classes under model/
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/3171459b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/3171459b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/3171459b Branch: refs/heads/master Commit: 3171459b0ce39722c42b5eef96ae4892e274cb5c Parents: a1c3437 Author: Wei Wang <[email protected]> Authored: Thu May 26 14:06:50 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Thu May 26 14:11:18 2016 +0800 ---------------------------------------------------------------------- include/singa/model/layer.h | 2 +- include/singa/model/loss.h | 2 +- include/singa/model/metric.h | 2 +- src/model/layer/cudnn_dropout.h | 1 - src/proto/layer.proto | 852 ----------------------------------- src/proto/model.proto | 852 +++++++++++++++++++++++++++++++++++ 6 files changed, 855 insertions(+), 856 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/include/singa/model/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h index 084c42e..5803295 100644 --- a/include/singa/model/layer.h +++ b/include/singa/model/layer.h @@ -24,7 +24,7 @@ #include <stack> #include <utility> #include "singa/core/tensor.h" -#include "singa/proto/layer.pb.h" +#include "singa/proto/model.pb.h" namespace singa { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/include/singa/model/loss.h ---------------------------------------------------------------------- diff --git a/include/singa/model/loss.h b/include/singa/model/loss.h index 6c79e7b..6a23067 100644 --- a/include/singa/model/loss.h +++ b/include/singa/model/loss.h @@ -18,7 +18,7 @@ #ifndef SINGA_MODEL_LOSS_H_ #define SINGA_MODEL_LOSS_H_ -#include "singa/proto/layer.pb.h" +#include "singa/proto/model.pb.h" #include "singa/core/tensor.h" namespace singa { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/include/singa/model/metric.h ---------------------------------------------------------------------- diff --git a/include/singa/model/metric.h b/include/singa/model/metric.h index 6519028..b99ff0d 100644 --- a/include/singa/model/metric.h +++ b/include/singa/model/metric.h @@ -19,7 +19,7 @@ #ifndef SINGA_MODEL_METRIC_H_ #define SINGA_MODEL_METRIC_H_ #include "singa/core/tensor.h" -#include "singa/proto/layer.pb.h" +#include "singa/proto/model.pb.h" namespace singa { /// The base metric class, which declares the APIs for computing the performance http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/src/model/layer/cudnn_dropout.h ---------------------------------------------------------------------- diff --git a/src/model/layer/cudnn_dropout.h b/src/model/layer/cudnn_dropout.h index d3b3de6..7cb185b 100644 --- a/src/model/layer/cudnn_dropout.h +++ b/src/model/layer/cudnn_dropout.h @@ -30,7 +30,6 @@ #include <vector> #include "singa/model/layer.h" -#include "singa/proto/core.pb.h" namespace singa { class CudnnDropout : public Dropout { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/src/proto/layer.proto ---------------------------------------------------------------------- diff --git a/src/proto/layer.proto b/src/proto/layer.proto deleted file mode 100644 index 51225ee..0000000 --- a/src/proto/layer.proto +++ /dev/null @@ -1,852 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package singa; - -/// \file layer.proto is adapted from [Caffe](https://github.com/BVLC/caffe/)'s -/// proto file with commit id c419f8517b1e1b3d7a07fe212fc6c90a70b519ea. We -/// use caffe's protocol for configuring layer hyper-parameters for easy -/// transporting Caffe model into SINGA. Specifically, we do the following -/// changes: -/// 1. we rename LayerParameter to LayerConf to differentiate model parameters -/// 2. we rename xxxParameter to xxxConf for fields of LayerParameter -/// 3. we comment out some fields (using /*...*/) not used in SINGA layer but -/// reserve their tags. -/// 4. we add new fields (commented like 'singa field..') to support our own -/// functionalities. -/// TODO(wangwei) write a proto converter to automatically load caffe models -/// using Python (or C++/Java). - -// Specifies the shape (dimensions) of a Blob. -message BlobShape { - repeated int64 dim = 1 [packed = true]; -} - -message BlobProto { - optional BlobShape shape = 7; - repeated float data = 5 [packed = true]; - repeated float diff = 6 [packed = true]; - repeated double double_data = 8 [packed = true]; - repeated double double_diff = 9 [packed = true]; - - // 4D dimensions -- deprecated. Use "shape" instead. - optional int32 num = 1 [default = 0]; - optional int32 channels = 2 [default = 0]; - optional int32 height = 3 [default = 0]; - optional int32 width = 4 [default = 0]; -} - -message FillerConf { - // The filler type. - optional string type = 1 [default = 'constant']; - optional float value = 2 [default = 0]; // the value in constant filler - optional float min = 3 [default = 0]; // the min value in uniform filler - optional float max = 4 [default = 1]; // the max value in uniform filler - optional float mean = 5 [default = 0]; // the mean value in Gaussian filler - optional float std = 6 [default = 1]; // the std value in Gaussian filler - // The expected number of non-zero output weights for a given input in - // Gaussian filler -- the default -1 means don't perform sparsification. - /* optional int32 sparse = 7 [default = -1]; */ - // Normalize the filler variance by fan_in, fan_out, or their average. - // Applies to 'xavier' and 'msra' fillers. - enum VarianceNorm { - FAN_IN = 0; - FAN_OUT = 1; - AVERAGE = 2; - } - optional VarianceNorm variance_norm = 8 [default = FAN_IN]; -} - -// Specifies training parameters (multipliers on global learning constants, -// and the name and other settings used for weight sharing). -message ParamSpec { - // The names of the parameter blobs -- useful for sharing parameters among - // layers, but never required otherwise. To share a parameter between two - // layers, give it a (non-empty) name. - optional string name = 1; - - // Whether to require shared weights to have the same shape, or just the same - // count -- defaults to STRICT if unspecified. - /* - optional DimCheckMode share_mode = 2; - enum DimCheckMode { - // STRICT (default) requires that num, channels, height, width each match. - STRICT = 0; - // PERMISSIVE requires only the count (num*channels*height*width) to match. - PERMISSIVE = 1; - } - */ - - // The multiplier on the global learning rate for this parameter. - optional float lr_mult = 3 [default = 1.0]; - - // The multiplier on the global weight decay for this parameter. - optional float decay_mult = 4 [default = 1.0]; - - // SINGA uses this filed internally. Users just configure the fillers in - // Layer specific conf message as caffe (style). - optional FillerConf filler = 20; -} - -enum Phase { - kTrain = 4; - kEval = 8; -} -// NOTE -// Update the next available ID when you add a new LayerConf field. -// -// LayerConf next available layer-specific ID: 139 (last added: tile_param) -message LayerConf { - optional string name = 1; // the layer name - optional string type = 2; // the layer type - /* repeated string bottom = 3; // the name of each bottom blob */ - /* repeated string top = 4; // the name of each top blob */ - - // The train / test phase for computation. - // optional Phase phase = 10; - - // The amount of weight to assign each top blob in the objective. - // Each layer assigns a default value, usually of either 0 or 1, - // to each top blob. - /* repeated float loss_weight = 5; */ - - // Specifies training parameters (multipliers on global learning constants, - // and the name and other settings used for weight sharing). - repeated ParamSpec param = 6; - - // The blobs containing the numeric parameters of the layer. - repeated BlobProto blobs = 7; - - // Specifies on which bottoms the backpropagation should be skipped. - // The size must be either 0 or equal to the number of bottoms. - /* repeated bool propagate_down = 11; */ - - // Rules controlling whether and when a layer is included in the network, - // based on the current NetState. You may specify a non-zero number of rules - // to include OR exclude, but not both. If no include or exclude rules are - // specified, the layer is always included. If the current NetState meets - // ANY (i.e., one or more) of the specified rules, the layer is - // included/excluded. - /* repeated NetStateRule include = 8; */ - /* repeated NetStateRule exclude = 9; */ - - // Confs for data pre-processing. - /* optional TransformationConf transform_param = 100; */ - - // Confs shared by loss layers. - /* optional LossConf loss_param = 101; */ - - // Layer type-specific parameters. - // - // Note: certain layers may have more than one computational engine - // for their implementation. These layers include an Engine type and - // engine parameter for selecting the implementation. - // The default for the engine is set by the ENGINE switch at compile-time. - //optional AccuracyConf accuracy_conf = 102; - optional ArgMaxConf argmax_conf = 103; - optional ConcatConf concat_conf = 104; - optional ContrastiveLossConf contrastive_loss_conf = 105; - optional ConvolutionConf convolution_conf = 106; - // optional DataConf data_conf = 107; - optional DropoutConf dropout_conf = 108; - // optional DummyDataConf dummy_data_conf = 109; - optional EltwiseConf eltwise_conf = 110; - optional EmbedConf embed_conf = 137; - optional ExpConf exp_conf = 111; - optional FlattenConf flatten_conf = 135; - // optional HDF5DataConf hdf5_data_conf = 112; - // optional HDF5OutputConf hdf5_output_conf = 113; - optional HingeLossConf hinge_loss_conf = 114; - // optional ImageDataConf image_data_conf = 115; - optional InfogainLossConf infogain_loss_conf = 116; - optional InnerProductConf inner_product_conf = 117; - optional LogConf log_conf = 134; - optional LRNConf lrn_conf = 118; - // Used in SINGA - optional MetricConf metric_conf = 200; - // optional MemoryDataConf memory_data_conf = 119; - optional MVNConf mvn_conf = 120; - optional PoolingConf pooling_conf = 121; - optional PowerConf power_conf = 122; - optional PReLUConf prelu_conf = 131; - // optional PythonConf python_conf = 130; - optional ReductionConf reduction_conf = 136; - optional ReLUConf relu_conf = 123; - optional ReshapeConf reshape_conf = 133; - optional SigmoidConf sigmoid_conf = 124; - optional SoftmaxConf softmax_conf = 125; - optional SPPConf spp_conf = 132; - optional SliceConf slice_conf = 126; - optional TanHConf tanh_conf = 127; - optional ThresholdConf threshold_conf = 128; - optional TileConf tile_conf = 138; - //optional WindowDataConf window_data_conf = 129; -} - -// Message that stores hyper-parameters used to apply transformation -// to the data layer's data -/* -message TransformationConf { - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 1 [default = 1]; - // Specify if we want to randomly mirror data. - optional bool mirror = 2 [default = false]; - // Specify if we would like to randomly crop an image. - optional uint32 crop_size = 3 [default = 0]; - // mean_file and mean_value cannot be specified at the same time - optional string mean_file = 4; - // if specified can be repeated once (would substract it from all the channels) - // or can be repeated the same number of times as channels - // (would subtract them from the corresponding channel) - repeated float mean_value = 5; - // Force the decoded image to have 3 color channels. - optional bool force_color = 6 [default = false]; - // Force the decoded image to have 1 color channels. - optional bool force_gray = 7 [default = false]; -} -*/ - -// Message that stores hyper-parameters shared by loss layers -message LossConf { - // If specified, ignore instances with the given label. - optional int32 ignore_label = 1; - // If true, normalize each batch across all instances (including spatial - // dimesions, but not ignored instances); else, divide by batch size only. - optional bool normalize = 2 [default = true]; -} - -message MetricConf { - // When computing accuracy, count as correct by comparing the true label to - // the top k scoring classes. By default, only compare to the top scoring - // class (i.e. argmax). - optional uint32 top_k = 1 [default = 1]; - - // The "label" axis of the prediction blob, whose argmax corresponds to the - // predicted label -- may be negative to index from the end (e.g., -1 for the - // last axis). For example, if axis == 1 and the predictions are - // (N x C x H x W), the label blob is expected to contain N*H*W ground truth - // labels with integer values in {0, 1, ..., C-1}. - optional int32 axis = 2 [default = 1]; - - // If specified, ignore instances with the given label. - optional int32 ignore_label = 3; -} -// Messages that store hyper-parameters used by individual layer types follow, in -// alphabetical order. - - - -message ArgMaxConf { - // If true produce pairs (argmax, maxval) - optional bool out_max_val = 1 [default = false]; - optional uint32 top_k = 2 [default = 1]; - // The axis along which to maximise -- may be negative to index from the - // end (e.g., -1 for the last axis). - // By default ArgMaxLayer maximizes over the flattened trailing dimensions - // for each index of the first / num dimension. - optional int32 axis = 3; -} - -message ConcatConf { - // The axis along which to concatenate -- may be negative to index from the - // end (e.g., -1 for the last axis). Other axes must have the - // same dimension for all the bottom blobs. - // By default, ConcatLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 2 [default = 1]; - - // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 concat_dim = 1 [default = 1]; -} - -message ContrastiveLossConf { - // margin for dissimilar pair - optional float margin = 1 [default = 1.0]; - // The first implementation of this cost did not exactly match the cost of - // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. - // legacy_version = false (the default) uses (margin - d)^2 as proposed in the - // Hadsell paper. New models should probably use this version. - // legacy_version = true uses (margin - d^2). This is kept to support / - // reproduce existing models and results - optional bool legacy_version = 2 [default = false]; -} - -message ConvolutionConf { - optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms - - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in all spatial dimensions, or once per spatial dimension. - repeated uint32 pad = 3; // The padding size; defaults to 0 - repeated uint32 kernel_size = 4; // The kernel size - repeated uint32 stride = 6; // The stride; defaults to 1 - - // For 2D convolution only, the *_h and *_w versions may also be used to - // specify both spatial dimensions. - optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) - optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) - optional uint32 kernel_h = 11; // The kernel height (2D only) - optional uint32 kernel_w = 12; // The kernel width (2D only) - optional uint32 stride_h = 13; // The stride height (2D only) - optional uint32 stride_w = 14; // The stride width (2D only) - - optional uint32 group = 5 [default = 1]; // The group size for group conv - - optional FillerConf weight_filler = 7; // The filler for the weight - optional FillerConf bias_filler = 8; // The filler for the bias - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 15 [default = DEFAULT]; - - // The axis to interpret as "channels" when performing convolution. - // Preceding dimensions are treated as independent inputs; - // succeeding dimensions are treated as "spatial". - // With (N, C, H, W) inputs, and axis == 1 (the default), we perform - // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for - // groups g>1) filters across the spatial axes (H, W) of the input. - // With (N, C, D, H, W) inputs, and axis == 1, we perform - // N independent 3D convolutions, sliding (C/g)-channels - // filters across the spatial axes (D, H, W) of the input. - optional int32 axis = 16 [default = 1]; - - // Whether to force use of the general ND convolution, even if a specific - // implementation for blobs of the appropriate number of spatial dimensions - // is available. (Currently, there is only a 2D-specific convolution - // implementation; for input blobs with num_axes != 2, this option is - // ignored and the ND implementation will be used.) - optional bool force_nd_im2col = 17 [default = false]; -} - -/* -message DataConf { - enum DB { - LEVELDB = 0; - LMDB = 1; - } - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 4; - // The rand_skip variable is for the data layer to skip a few data points - // to avoid all asynchronous sgd clients to start at the same point. The skip - // point would be set as rand_skip * rand(0,1). Note that rand_skip should not - // be larger than the number of keys in the database. - // DEPRECATED. Each solver accesses a different subset of the database. - optional uint32 rand_skip = 7 [default = 0]; - optional DB backend = 8 [default = LEVELDB]; - // DEPRECATED. See TransformationConf. For data pre-processing, we can do - // simple scaling and subtracting the data mean, if provided. Note that the - // mean subtraction is always carried out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // DEPRECATED. See TransformationConf. Specify if we would like to randomly - // crop an image. - optional uint32 crop_size = 5 [default = 0]; - // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror - // data. - optional bool mirror = 6 [default = false]; - // Force the encoded image to have 3 color channels - optional bool force_encoded_color = 9 [default = false]; - // Prefetch queue (Number of batches to prefetch to host memory, increase if - // data access bandwidth varies). - optional uint32 prefetch = 10 [default = 4]; -} -*/ - -message DropoutConf { - optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio -} - -// DummyDataLayer fills any number of arbitrarily shaped blobs with random -// (or constant) data generated by "Fillers" (see "message FillerConf"). -message DummyDataConf { - // This layer produces N >= 1 top blobs. DummyDataConf must specify 1 or N - // shape fields, and 0, 1 or N data_fillers. - // - // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. - // If 1 data_filler is specified, it is applied to all top blobs. If N are - // specified, the ith is applied to the ith top blob. - repeated FillerConf data_filler = 1; - repeated BlobShape shape = 6; - - // 4D dimensions -- deprecated. Use "shape" instead. - repeated uint32 num = 2; - repeated uint32 channels = 3; - repeated uint32 height = 4; - repeated uint32 width = 5; -} - -message EltwiseConf { - enum EltwiseOp { - PROD = 0; - SUM = 1; - MAX = 2; - } - optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation - repeated float coeff = 2; // blob-wise coefficient for SUM operation - - // Whether to use an asymptotically slower (for >2 inputs) but stabler method - // of computing the gradient for the PROD operation. (No effect for SUM op.) - optional bool stable_prod_grad = 3 [default = true]; -} - -// Message that stores hyper-parameters used by EmbedLayer -message EmbedConf { - optional uint32 num_output = 1; // The number of outputs for the layer - // The input is given as integers to be interpreted as one-hot - // vector indices with dimension num_input. Hence num_input should be - // 1 greater than the maximum possible input value. - optional uint32 input_dim = 2; - - optional bool bias_term = 3 [default = true]; // Whether to use a bias term - optional FillerConf weight_filler = 4; // The filler for the weight - optional FillerConf bias_filler = 5; // The filler for the bias - -} - -// Message that stores hyper-parameters used by ExpLayer -message ExpConf { - // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. - // Or if base is set to the default (-1), base is set to e, - // so y = exp(shift + scale * x). - optional float base = 1 [default = -1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} - -/// Message that stores hyper-parameters used by FlattenLayer -message FlattenConf { - // The first axis to flatten: all preceding axes are retained in the output. - // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 1 [default = 1]; - - // The last axis to flatten: all following axes are retained in the output. - // May be negative to index from the end (e.g., the default -1 for the last - // axis). - optional int32 end_axis = 2 [default = -1]; -} - -/* -// Message that stores hyper-parameters used by HDF5DataLayer -message HDF5DataConf { - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 2; - - // Specify whether to shuffle the data. - // If shuffle == true, the ordering of the HDF5 files is shuffled, - // and the ordering of data within any given HDF5 file is shuffled, - // but data between different files are not interleaved; all of a file's - // data are output (in a random order) before moving onto another file. - optional bool shuffle = 3 [default = false]; -} - -message HDF5OutputConf { - optional string file_name = 1; -} -*/ - -message HingeLossConf { - enum Norm { - L1 = 1; - L2 = 2; - } - // Specify the Norm to use L1 or L2 - optional Norm norm = 1 [default = L1]; -} - -/* -message ImageDataConf { - // Specify the data source. - optional string source = 1; - // Specify the batch size. - optional uint32 batch_size = 4 [default = 1]; - // The rand_skip variable is for the data layer to skip a few data points - // to avoid all asynchronous sgd clients to start at the same point. The skip - // point would be set as rand_skip * rand(0,1). Note that rand_skip should not - // be larger than the number of keys in the database. - optional uint32 rand_skip = 7 [default = 0]; - // Whether or not ImageLayer should shuffle the list of files at every epoch. - optional bool shuffle = 8 [default = false]; - // It will also resize images if new_height or new_width are not zero. - optional uint32 new_height = 9 [default = 0]; - optional uint32 new_width = 10 [default = 0]; - // Specify if the images are color or gray - optional bool is_color = 11 [default = true]; - // DEPRECATED. See TransformationConf. For data pre-processing, we can do - // simple scaling and subtracting the data mean, if provided. Note that the - // mean subtraction is always carried out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // DEPRECATED. See TransformationConf. Specify if we would like to randomly - // crop an image. - optional uint32 crop_size = 5 [default = 0]; - // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror - // data. - optional bool mirror = 6 [default = false]; - optional string root_folder = 12 [default = ""]; -} -*/ - -message InfogainLossConf { - // Specify the infogain matrix source. - optional string source = 1; -} - -message InnerProductConf { - optional uint32 num_output = 1; // The number of outputs for the layer - optional bool bias_term = 2 [default = true]; // whether to have bias terms - optional FillerConf weight_filler = 3; // The filler for the weight - optional FillerConf bias_filler = 4; // The filler for the bias - - // The first axis to be lumped into a single inner product computation; - // all preceding axes are retained in the output. - // May be negative to index from the end (e.g., -1 for the last axis). - optional int32 axis = 5 [default = 1]; -} - -// Message that stores hyper-parameters used by LogLayer -message LogConf { - // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. - // Or if base is set to the default (-1), base is set to e, - // so y = ln(shift + scale * x) = log_e(shift + scale * x) - optional float base = 1 [default = -1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} - -// Message that stores hyper-parameters used by LRNLayer -message LRNConf { - optional uint32 local_size = 1 [default = 5]; - optional float alpha = 2 [default = 1.]; - optional float beta = 3 [default = 0.75]; - enum NormRegion { - ACROSS_CHANNELS = 0; - WITHIN_CHANNEL = 1; - } - optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; - optional float k = 5 [default = 1.]; -} - -message MemoryDataConf { - optional uint32 batch_size = 1; - optional uint32 channels = 2; - optional uint32 height = 3; - optional uint32 width = 4; -} - -message MVNConf { - // This parameter can be set to false to normalize mean only - optional bool normalize_variance = 1 [default = true]; - - // This parameter can be set to true to perform DNN-like MVN - optional bool across_channels = 2 [default = false]; - - // Epsilon for not dividing by zero while normalizing variance - optional float eps = 3 [default = 1e-9]; -} - -message PoolingConf { - enum PoolMethod { - MAX = 0; - AVE = 1; - STOCHASTIC = 2; - } - optional PoolMethod pool = 1 [default = MAX]; // The pooling method - // Pad, kernel size, and stride are all given as a single value for equal - // dimensions in height and width or as Y, X pairs. - optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) - optional uint32 pad_h = 9 [default = 0]; // The padding height - optional uint32 pad_w = 10 [default = 0]; // The padding width - optional uint32 kernel_size = 2; // The kernel size (square) - optional uint32 kernel_h = 5; // The kernel height - optional uint32 kernel_w = 6; // The kernel width - optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) - optional uint32 stride_h = 7; // The stride height - optional uint32 stride_w = 8; // The stride width - /* - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 11 [default = DEFAULT]; - */ - // If global_pooling then it will pool over the size of the bottom by doing - // kernel_h = bottom->height and kernel_w = bottom->width - optional bool global_pooling = 12 [default = false]; -} - -message PowerConf { - // PowerLayer computes outputs y = (shift + scale * x) ^ power. - optional float power = 1 [default = 1.0]; - optional float scale = 2 [default = 1.0]; - optional float shift = 3 [default = 0.0]; -} -/* -message PythonConf { - optional string module = 1; - optional string layer = 2; - // This value is set to the attribute `param_str` of the `PythonLayer` object - // in Python before calling the `setup()` method. This could be a number, - // string, dictionary in Python dict format, JSON, etc. You may parse this - // string in `setup` method and use it in `forward` and `backward`. - optional string param_str = 3 [default = '']; - // Whether this PythonLayer is shared among worker solvers during data parallelism. - // If true, each worker solver sequentially run forward from this layer. - // This value should be set true if you are using it as a data layer. - optional bool share_in_parallel = 4 [default = false]; -} -*/ - -// Message that stores hyper-parameters used by ReductionLayer -message ReductionConf { - enum ReductionOp { - SUM = 1; - ASUM = 2; - SUMSQ = 3; - MEAN = 4; - } - - optional ReductionOp operation = 1 [default = SUM]; // reduction operation - - // The first axis to reduce to a scalar -- may be negative to index from the - // end (e.g., -1 for the last axis). - // (Currently, only reduction along ALL "tail" axes is supported; reduction - // of axis M through N, where N < num_axes - 1, is unsupported.) - // Suppose we have an n-axis bottom Blob with shape: - // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). - // If axis == m, the output Blob will have shape - // (d0, d1, d2, ..., d(m-1)), - // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) - // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. - // If axis == 0 (the default), the output Blob always has the empty shape - // (count 1), performing reduction across the entire input -- - // often useful for creating new loss functions. - optional int32 axis = 2 [default = 0]; - - optional float coeff = 3 [default = 1.0]; // coefficient for output -} - -// Message that stores hyper-parameters used by ReLULayer -message ReLUConf { - // Allow non-zero slope for negative inputs to speed up optimization - // Described in: - // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities - // improve neural network acoustic models. In ICML Workshop on Deep Learning - // for Audio, Speech, and Language Processing. - optional float negative_slope = 1 [default = 0]; - /* - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 2 [default = DEFAULT]; - */ -} - -message ReshapeConf { - // Specify the output dimensions. If some of the dimensions are set to 0, - // the corresponding dimension from the bottom layer is used (unchanged). - // Exactly one dimension may be set to -1, in which case its value is - // inferred from the count of the bottom blob and the remaining dimensions. - // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: - // - // layer { - // type: "Reshape" bottom: "input" top: "output" - // reshape_param { ... } - // } - // - // If "input" is 2D with shape 2 x 8, then the following reshape_param - // specifications are all equivalent, producing a 3D blob "output" with shape - // 2 x 2 x 4: - // - // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } - // reshape_param { shape { dim: -1 dim: 0 dim: 2 } } - // - optional BlobShape shape = 1; - - // axis and num_axes control the portion of the bottom blob's shape that are - // replaced by (included in) the reshape. By default (axis == 0 and - // num_axes == -1), the entire bottom blob shape is included in the reshape, - // and hence the shape field must specify the entire output shape. - // - // axis may be non-zero to retain some portion of the beginning of the input - // shape (and may be negative to index from the end; e.g., -1 to begin the - // reshape after the last axis, including nothing in the reshape, - // -2 to include only the last axis, etc.). - // - // For example, suppose "input" is a 2D blob with shape 2 x 8. - // Then the following ReshapeLayer specifications are all equivalent, - // producing a blob "output" with shape 2 x 2 x 4: - // - // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } - // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } - // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } - // - // num_axes specifies the extent of the reshape. - // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on - // input axes in the range [axis, axis+num_axes]. - // num_axes may also be -1, the default, to include all remaining axes - // (starting from axis). - // - // For example, suppose "input" is a 2D blob with shape 2 x 8. - // Then the following ReshapeLayer specifications are equivalent, - // producing a blob "output" with shape 1 x 2 x 8. - // - // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } - // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } - // reshape_param { shape { dim: 1 } num_axes: 0 } - // - // On the other hand, these would produce output blob shape 2 x 1 x 8: - // - // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } - // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } - // - optional int32 axis = 2 [default = 0]; - optional int32 num_axes = 3 [default = -1]; -} - -message SigmoidConf { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; -} - -message SliceConf { - // The axis along which to slice -- may be negative to index from the end - // (e.g., -1 for the last axis). - // By default, SliceLayer concatenates blobs along the "channels" axis (1). - optional int32 axis = 3 [default = 1]; - repeated uint32 slice_point = 2; - - // DEPRECATED: alias for "axis" -- does not support negative indexing. - optional uint32 slice_dim = 1 [default = 1]; -} - -// Message that stores hyper-parameters used by SoftmaxLayer, SoftmaxWithLossLayer -message SoftmaxConf { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; - - // The axis along which to perform the softmax -- may be negative to index - // from the end (e.g., -1 for the last axis). - // Any other axes will be evaluated as independent softmaxes. - optional int32 axis = 2 [default = 1]; -} - -message TanHConf { - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 1 [default = DEFAULT]; -} - -// Message that stores hyper-parameters used by TileLayer -message TileConf { - // The index of the axis to tile. - optional int32 axis = 1 [default = 1]; - - // The number of copies (tiles) of the blob to output. - optional int32 tiles = 2; -} - -// Message that stores hyper-parameters used by ThresholdLayer -message ThresholdConf { - optional float threshold = 1 [default = 0]; // Strictly positive values -} - -/* -message WindowDataConf { - // Specify the data source. - optional string source = 1; - // For data pre-processing, we can do simple scaling and subtracting the - // data mean, if provided. Note that the mean subtraction is always carried - // out before scaling. - optional float scale = 2 [default = 1]; - optional string mean_file = 3; - // Specify the batch size. - optional uint32 batch_size = 4; - // Specify if we would like to randomly crop an image. - optional uint32 crop_size = 5 [default = 0]; - // Specify if we want to randomly mirror data. - optional bool mirror = 6 [default = false]; - // Foreground (object) overlap threshold - optional float fg_threshold = 7 [default = 0.5]; - // Background (non-object) overlap threshold - optional float bg_threshold = 8 [default = 0.5]; - // Fraction of batch that should be foreground objects - optional float fg_fraction = 9 [default = 0.25]; - // Amount of contextual padding to add around a window - // (used only by the window_data_layer) - optional uint32 context_pad = 10 [default = 0]; - // Mode for cropping out a detection window - // warp: cropped window is warped to a fixed size and aspect ratio - // square: the tightest square around the window is cropped - optional string crop_mode = 11 [default = "warp"]; - // cache_images: will load all images in memory for faster access - optional bool cache_images = 12 [default = false]; - // append root_folder to locate images - optional string root_folder = 13 [default = ""]; -} -*/ - -message SPPConf { - enum PoolMethod { - MAX = 0; - AVE = 1; - STOCHASTIC = 2; - } - optional uint32 pyramid_height = 1; - optional PoolMethod pool = 2 [default = MAX]; // The pooling method - /* - enum Engine { - DEFAULT = 0; - CAFFE = 1; - CUDNN = 2; - } - optional Engine engine = 6 [default = DEFAULT]; - */ -} - -message PReLUConf { - // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: - // Surpassing Human-Level Performance on ImageNet Classification, 2015. - - // Initial value of a_i. Default is a_i=0.25 for all i. - optional FillerConf filler = 1; - // Whether or not slope paramters are shared across channels. - optional bool channel_shared = 2 [default = false]; -} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/3171459b/src/proto/model.proto ---------------------------------------------------------------------- diff --git a/src/proto/model.proto b/src/proto/model.proto new file mode 100644 index 0000000..51225ee --- /dev/null +++ b/src/proto/model.proto @@ -0,0 +1,852 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package singa; + +/// \file layer.proto is adapted from [Caffe](https://github.com/BVLC/caffe/)'s +/// proto file with commit id c419f8517b1e1b3d7a07fe212fc6c90a70b519ea. We +/// use caffe's protocol for configuring layer hyper-parameters for easy +/// transporting Caffe model into SINGA. Specifically, we do the following +/// changes: +/// 1. we rename LayerParameter to LayerConf to differentiate model parameters +/// 2. we rename xxxParameter to xxxConf for fields of LayerParameter +/// 3. we comment out some fields (using /*...*/) not used in SINGA layer but +/// reserve their tags. +/// 4. we add new fields (commented like 'singa field..') to support our own +/// functionalities. +/// TODO(wangwei) write a proto converter to automatically load caffe models +/// using Python (or C++/Java). + +// Specifies the shape (dimensions) of a Blob. +message BlobShape { + repeated int64 dim = 1 [packed = true]; +} + +message BlobProto { + optional BlobShape shape = 7; + repeated float data = 5 [packed = true]; + repeated float diff = 6 [packed = true]; + repeated double double_data = 8 [packed = true]; + repeated double double_diff = 9 [packed = true]; + + // 4D dimensions -- deprecated. Use "shape" instead. + optional int32 num = 1 [default = 0]; + optional int32 channels = 2 [default = 0]; + optional int32 height = 3 [default = 0]; + optional int32 width = 4 [default = 0]; +} + +message FillerConf { + // The filler type. + optional string type = 1 [default = 'constant']; + optional float value = 2 [default = 0]; // the value in constant filler + optional float min = 3 [default = 0]; // the min value in uniform filler + optional float max = 4 [default = 1]; // the max value in uniform filler + optional float mean = 5 [default = 0]; // the mean value in Gaussian filler + optional float std = 6 [default = 1]; // the std value in Gaussian filler + // The expected number of non-zero output weights for a given input in + // Gaussian filler -- the default -1 means don't perform sparsification. + /* optional int32 sparse = 7 [default = -1]; */ + // Normalize the filler variance by fan_in, fan_out, or their average. + // Applies to 'xavier' and 'msra' fillers. + enum VarianceNorm { + FAN_IN = 0; + FAN_OUT = 1; + AVERAGE = 2; + } + optional VarianceNorm variance_norm = 8 [default = FAN_IN]; +} + +// Specifies training parameters (multipliers on global learning constants, +// and the name and other settings used for weight sharing). +message ParamSpec { + // The names of the parameter blobs -- useful for sharing parameters among + // layers, but never required otherwise. To share a parameter between two + // layers, give it a (non-empty) name. + optional string name = 1; + + // Whether to require shared weights to have the same shape, or just the same + // count -- defaults to STRICT if unspecified. + /* + optional DimCheckMode share_mode = 2; + enum DimCheckMode { + // STRICT (default) requires that num, channels, height, width each match. + STRICT = 0; + // PERMISSIVE requires only the count (num*channels*height*width) to match. + PERMISSIVE = 1; + } + */ + + // The multiplier on the global learning rate for this parameter. + optional float lr_mult = 3 [default = 1.0]; + + // The multiplier on the global weight decay for this parameter. + optional float decay_mult = 4 [default = 1.0]; + + // SINGA uses this filed internally. Users just configure the fillers in + // Layer specific conf message as caffe (style). + optional FillerConf filler = 20; +} + +enum Phase { + kTrain = 4; + kEval = 8; +} +// NOTE +// Update the next available ID when you add a new LayerConf field. +// +// LayerConf next available layer-specific ID: 139 (last added: tile_param) +message LayerConf { + optional string name = 1; // the layer name + optional string type = 2; // the layer type + /* repeated string bottom = 3; // the name of each bottom blob */ + /* repeated string top = 4; // the name of each top blob */ + + // The train / test phase for computation. + // optional Phase phase = 10; + + // The amount of weight to assign each top blob in the objective. + // Each layer assigns a default value, usually of either 0 or 1, + // to each top blob. + /* repeated float loss_weight = 5; */ + + // Specifies training parameters (multipliers on global learning constants, + // and the name and other settings used for weight sharing). + repeated ParamSpec param = 6; + + // The blobs containing the numeric parameters of the layer. + repeated BlobProto blobs = 7; + + // Specifies on which bottoms the backpropagation should be skipped. + // The size must be either 0 or equal to the number of bottoms. + /* repeated bool propagate_down = 11; */ + + // Rules controlling whether and when a layer is included in the network, + // based on the current NetState. You may specify a non-zero number of rules + // to include OR exclude, but not both. If no include or exclude rules are + // specified, the layer is always included. If the current NetState meets + // ANY (i.e., one or more) of the specified rules, the layer is + // included/excluded. + /* repeated NetStateRule include = 8; */ + /* repeated NetStateRule exclude = 9; */ + + // Confs for data pre-processing. + /* optional TransformationConf transform_param = 100; */ + + // Confs shared by loss layers. + /* optional LossConf loss_param = 101; */ + + // Layer type-specific parameters. + // + // Note: certain layers may have more than one computational engine + // for their implementation. These layers include an Engine type and + // engine parameter for selecting the implementation. + // The default for the engine is set by the ENGINE switch at compile-time. + //optional AccuracyConf accuracy_conf = 102; + optional ArgMaxConf argmax_conf = 103; + optional ConcatConf concat_conf = 104; + optional ContrastiveLossConf contrastive_loss_conf = 105; + optional ConvolutionConf convolution_conf = 106; + // optional DataConf data_conf = 107; + optional DropoutConf dropout_conf = 108; + // optional DummyDataConf dummy_data_conf = 109; + optional EltwiseConf eltwise_conf = 110; + optional EmbedConf embed_conf = 137; + optional ExpConf exp_conf = 111; + optional FlattenConf flatten_conf = 135; + // optional HDF5DataConf hdf5_data_conf = 112; + // optional HDF5OutputConf hdf5_output_conf = 113; + optional HingeLossConf hinge_loss_conf = 114; + // optional ImageDataConf image_data_conf = 115; + optional InfogainLossConf infogain_loss_conf = 116; + optional InnerProductConf inner_product_conf = 117; + optional LogConf log_conf = 134; + optional LRNConf lrn_conf = 118; + // Used in SINGA + optional MetricConf metric_conf = 200; + // optional MemoryDataConf memory_data_conf = 119; + optional MVNConf mvn_conf = 120; + optional PoolingConf pooling_conf = 121; + optional PowerConf power_conf = 122; + optional PReLUConf prelu_conf = 131; + // optional PythonConf python_conf = 130; + optional ReductionConf reduction_conf = 136; + optional ReLUConf relu_conf = 123; + optional ReshapeConf reshape_conf = 133; + optional SigmoidConf sigmoid_conf = 124; + optional SoftmaxConf softmax_conf = 125; + optional SPPConf spp_conf = 132; + optional SliceConf slice_conf = 126; + optional TanHConf tanh_conf = 127; + optional ThresholdConf threshold_conf = 128; + optional TileConf tile_conf = 138; + //optional WindowDataConf window_data_conf = 129; +} + +// Message that stores hyper-parameters used to apply transformation +// to the data layer's data +/* +message TransformationConf { + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 1 [default = 1]; + // Specify if we want to randomly mirror data. + optional bool mirror = 2 [default = false]; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 3 [default = 0]; + // mean_file and mean_value cannot be specified at the same time + optional string mean_file = 4; + // if specified can be repeated once (would substract it from all the channels) + // or can be repeated the same number of times as channels + // (would subtract them from the corresponding channel) + repeated float mean_value = 5; + // Force the decoded image to have 3 color channels. + optional bool force_color = 6 [default = false]; + // Force the decoded image to have 1 color channels. + optional bool force_gray = 7 [default = false]; +} +*/ + +// Message that stores hyper-parameters shared by loss layers +message LossConf { + // If specified, ignore instances with the given label. + optional int32 ignore_label = 1; + // If true, normalize each batch across all instances (including spatial + // dimesions, but not ignored instances); else, divide by batch size only. + optional bool normalize = 2 [default = true]; +} + +message MetricConf { + // When computing accuracy, count as correct by comparing the true label to + // the top k scoring classes. By default, only compare to the top scoring + // class (i.e. argmax). + optional uint32 top_k = 1 [default = 1]; + + // The "label" axis of the prediction blob, whose argmax corresponds to the + // predicted label -- may be negative to index from the end (e.g., -1 for the + // last axis). For example, if axis == 1 and the predictions are + // (N x C x H x W), the label blob is expected to contain N*H*W ground truth + // labels with integer values in {0, 1, ..., C-1}. + optional int32 axis = 2 [default = 1]; + + // If specified, ignore instances with the given label. + optional int32 ignore_label = 3; +} +// Messages that store hyper-parameters used by individual layer types follow, in +// alphabetical order. + + + +message ArgMaxConf { + // If true produce pairs (argmax, maxval) + optional bool out_max_val = 1 [default = false]; + optional uint32 top_k = 2 [default = 1]; + // The axis along which to maximise -- may be negative to index from the + // end (e.g., -1 for the last axis). + // By default ArgMaxLayer maximizes over the flattened trailing dimensions + // for each index of the first / num dimension. + optional int32 axis = 3; +} + +message ConcatConf { + // The axis along which to concatenate -- may be negative to index from the + // end (e.g., -1 for the last axis). Other axes must have the + // same dimension for all the bottom blobs. + // By default, ConcatLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 2 [default = 1]; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 concat_dim = 1 [default = 1]; +} + +message ContrastiveLossConf { + // margin for dissimilar pair + optional float margin = 1 [default = 1.0]; + // The first implementation of this cost did not exactly match the cost of + // Hadsell et al 2006 -- using (margin - d^2) instead of (margin - d)^2. + // legacy_version = false (the default) uses (margin - d)^2 as proposed in the + // Hadsell paper. New models should probably use this version. + // legacy_version = true uses (margin - d^2). This is kept to support / + // reproduce existing models and results + optional bool legacy_version = 2 [default = false]; +} + +message ConvolutionConf { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in all spatial dimensions, or once per spatial dimension. + repeated uint32 pad = 3; // The padding size; defaults to 0 + repeated uint32 kernel_size = 4; // The kernel size + repeated uint32 stride = 6; // The stride; defaults to 1 + + // For 2D convolution only, the *_h and *_w versions may also be used to + // specify both spatial dimensions. + optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only) + optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only) + optional uint32 kernel_h = 11; // The kernel height (2D only) + optional uint32 kernel_w = 12; // The kernel width (2D only) + optional uint32 stride_h = 13; // The stride height (2D only) + optional uint32 stride_w = 14; // The stride width (2D only) + + optional uint32 group = 5 [default = 1]; // The group size for group conv + + optional FillerConf weight_filler = 7; // The filler for the weight + optional FillerConf bias_filler = 8; // The filler for the bias + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 15 [default = DEFAULT]; + + // The axis to interpret as "channels" when performing convolution. + // Preceding dimensions are treated as independent inputs; + // succeeding dimensions are treated as "spatial". + // With (N, C, H, W) inputs, and axis == 1 (the default), we perform + // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for + // groups g>1) filters across the spatial axes (H, W) of the input. + // With (N, C, D, H, W) inputs, and axis == 1, we perform + // N independent 3D convolutions, sliding (C/g)-channels + // filters across the spatial axes (D, H, W) of the input. + optional int32 axis = 16 [default = 1]; + + // Whether to force use of the general ND convolution, even if a specific + // implementation for blobs of the appropriate number of spatial dimensions + // is available. (Currently, there is only a 2D-specific convolution + // implementation; for input blobs with num_axes != 2, this option is + // ignored and the ND implementation will be used.) + optional bool force_nd_im2col = 17 [default = false]; +} + +/* +message DataConf { + enum DB { + LEVELDB = 0; + LMDB = 1; + } + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + // DEPRECATED. Each solver accesses a different subset of the database. + optional uint32 rand_skip = 7 [default = 0]; + optional DB backend = 8 [default = LEVELDB]; + // DEPRECATED. See TransformationConf. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // DEPRECATED. See TransformationConf. Specify if we would like to randomly + // crop an image. + optional uint32 crop_size = 5 [default = 0]; + // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror + // data. + optional bool mirror = 6 [default = false]; + // Force the encoded image to have 3 color channels + optional bool force_encoded_color = 9 [default = false]; + // Prefetch queue (Number of batches to prefetch to host memory, increase if + // data access bandwidth varies). + optional uint32 prefetch = 10 [default = 4]; +} +*/ + +message DropoutConf { + optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio +} + +// DummyDataLayer fills any number of arbitrarily shaped blobs with random +// (or constant) data generated by "Fillers" (see "message FillerConf"). +message DummyDataConf { + // This layer produces N >= 1 top blobs. DummyDataConf must specify 1 or N + // shape fields, and 0, 1 or N data_fillers. + // + // If 0 data_fillers are specified, ConstantFiller with a value of 0 is used. + // If 1 data_filler is specified, it is applied to all top blobs. If N are + // specified, the ith is applied to the ith top blob. + repeated FillerConf data_filler = 1; + repeated BlobShape shape = 6; + + // 4D dimensions -- deprecated. Use "shape" instead. + repeated uint32 num = 2; + repeated uint32 channels = 3; + repeated uint32 height = 4; + repeated uint32 width = 5; +} + +message EltwiseConf { + enum EltwiseOp { + PROD = 0; + SUM = 1; + MAX = 2; + } + optional EltwiseOp operation = 1 [default = SUM]; // element-wise operation + repeated float coeff = 2; // blob-wise coefficient for SUM operation + + // Whether to use an asymptotically slower (for >2 inputs) but stabler method + // of computing the gradient for the PROD operation. (No effect for SUM op.) + optional bool stable_prod_grad = 3 [default = true]; +} + +// Message that stores hyper-parameters used by EmbedLayer +message EmbedConf { + optional uint32 num_output = 1; // The number of outputs for the layer + // The input is given as integers to be interpreted as one-hot + // vector indices with dimension num_input. Hence num_input should be + // 1 greater than the maximum possible input value. + optional uint32 input_dim = 2; + + optional bool bias_term = 3 [default = true]; // Whether to use a bias term + optional FillerConf weight_filler = 4; // The filler for the weight + optional FillerConf bias_filler = 5; // The filler for the bias + +} + +// Message that stores hyper-parameters used by ExpLayer +message ExpConf { + // ExpLayer computes outputs y = base ^ (shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = exp(shift + scale * x). + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} + +/// Message that stores hyper-parameters used by FlattenLayer +message FlattenConf { + // The first axis to flatten: all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 1 [default = 1]; + + // The last axis to flatten: all following axes are retained in the output. + // May be negative to index from the end (e.g., the default -1 for the last + // axis). + optional int32 end_axis = 2 [default = -1]; +} + +/* +// Message that stores hyper-parameters used by HDF5DataLayer +message HDF5DataConf { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 2; + + // Specify whether to shuffle the data. + // If shuffle == true, the ordering of the HDF5 files is shuffled, + // and the ordering of data within any given HDF5 file is shuffled, + // but data between different files are not interleaved; all of a file's + // data are output (in a random order) before moving onto another file. + optional bool shuffle = 3 [default = false]; +} + +message HDF5OutputConf { + optional string file_name = 1; +} +*/ + +message HingeLossConf { + enum Norm { + L1 = 1; + L2 = 2; + } + // Specify the Norm to use L1 or L2 + optional Norm norm = 1 [default = L1]; +} + +/* +message ImageDataConf { + // Specify the data source. + optional string source = 1; + // Specify the batch size. + optional uint32 batch_size = 4 [default = 1]; + // The rand_skip variable is for the data layer to skip a few data points + // to avoid all asynchronous sgd clients to start at the same point. The skip + // point would be set as rand_skip * rand(0,1). Note that rand_skip should not + // be larger than the number of keys in the database. + optional uint32 rand_skip = 7 [default = 0]; + // Whether or not ImageLayer should shuffle the list of files at every epoch. + optional bool shuffle = 8 [default = false]; + // It will also resize images if new_height or new_width are not zero. + optional uint32 new_height = 9 [default = 0]; + optional uint32 new_width = 10 [default = 0]; + // Specify if the images are color or gray + optional bool is_color = 11 [default = true]; + // DEPRECATED. See TransformationConf. For data pre-processing, we can do + // simple scaling and subtracting the data mean, if provided. Note that the + // mean subtraction is always carried out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // DEPRECATED. See TransformationConf. Specify if we would like to randomly + // crop an image. + optional uint32 crop_size = 5 [default = 0]; + // DEPRECATED. See TransformationConf. Specify if we want to randomly mirror + // data. + optional bool mirror = 6 [default = false]; + optional string root_folder = 12 [default = ""]; +} +*/ + +message InfogainLossConf { + // Specify the infogain matrix source. + optional string source = 1; +} + +message InnerProductConf { + optional uint32 num_output = 1; // The number of outputs for the layer + optional bool bias_term = 2 [default = true]; // whether to have bias terms + optional FillerConf weight_filler = 3; // The filler for the weight + optional FillerConf bias_filler = 4; // The filler for the bias + + // The first axis to be lumped into a single inner product computation; + // all preceding axes are retained in the output. + // May be negative to index from the end (e.g., -1 for the last axis). + optional int32 axis = 5 [default = 1]; +} + +// Message that stores hyper-parameters used by LogLayer +message LogConf { + // LogLayer computes outputs y = log_base(shift + scale * x), for base > 0. + // Or if base is set to the default (-1), base is set to e, + // so y = ln(shift + scale * x) = log_e(shift + scale * x) + optional float base = 1 [default = -1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} + +// Message that stores hyper-parameters used by LRNLayer +message LRNConf { + optional uint32 local_size = 1 [default = 5]; + optional float alpha = 2 [default = 1.]; + optional float beta = 3 [default = 0.75]; + enum NormRegion { + ACROSS_CHANNELS = 0; + WITHIN_CHANNEL = 1; + } + optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS]; + optional float k = 5 [default = 1.]; +} + +message MemoryDataConf { + optional uint32 batch_size = 1; + optional uint32 channels = 2; + optional uint32 height = 3; + optional uint32 width = 4; +} + +message MVNConf { + // This parameter can be set to false to normalize mean only + optional bool normalize_variance = 1 [default = true]; + + // This parameter can be set to true to perform DNN-like MVN + optional bool across_channels = 2 [default = false]; + + // Epsilon for not dividing by zero while normalizing variance + optional float eps = 3 [default = 1e-9]; +} + +message PoolingConf { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional PoolMethod pool = 1 [default = MAX]; // The pooling method + // Pad, kernel size, and stride are all given as a single value for equal + // dimensions in height and width or as Y, X pairs. + optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X) + optional uint32 pad_h = 9 [default = 0]; // The padding height + optional uint32 pad_w = 10 [default = 0]; // The padding width + optional uint32 kernel_size = 2; // The kernel size (square) + optional uint32 kernel_h = 5; // The kernel height + optional uint32 kernel_w = 6; // The kernel width + optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X) + optional uint32 stride_h = 7; // The stride height + optional uint32 stride_w = 8; // The stride width + /* + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 11 [default = DEFAULT]; + */ + // If global_pooling then it will pool over the size of the bottom by doing + // kernel_h = bottom->height and kernel_w = bottom->width + optional bool global_pooling = 12 [default = false]; +} + +message PowerConf { + // PowerLayer computes outputs y = (shift + scale * x) ^ power. + optional float power = 1 [default = 1.0]; + optional float scale = 2 [default = 1.0]; + optional float shift = 3 [default = 0.0]; +} +/* +message PythonConf { + optional string module = 1; + optional string layer = 2; + // This value is set to the attribute `param_str` of the `PythonLayer` object + // in Python before calling the `setup()` method. This could be a number, + // string, dictionary in Python dict format, JSON, etc. You may parse this + // string in `setup` method and use it in `forward` and `backward`. + optional string param_str = 3 [default = '']; + // Whether this PythonLayer is shared among worker solvers during data parallelism. + // If true, each worker solver sequentially run forward from this layer. + // This value should be set true if you are using it as a data layer. + optional bool share_in_parallel = 4 [default = false]; +} +*/ + +// Message that stores hyper-parameters used by ReductionLayer +message ReductionConf { + enum ReductionOp { + SUM = 1; + ASUM = 2; + SUMSQ = 3; + MEAN = 4; + } + + optional ReductionOp operation = 1 [default = SUM]; // reduction operation + + // The first axis to reduce to a scalar -- may be negative to index from the + // end (e.g., -1 for the last axis). + // (Currently, only reduction along ALL "tail" axes is supported; reduction + // of axis M through N, where N < num_axes - 1, is unsupported.) + // Suppose we have an n-axis bottom Blob with shape: + // (d0, d1, d2, ..., d(m-1), dm, d(m+1), ..., d(n-1)). + // If axis == m, the output Blob will have shape + // (d0, d1, d2, ..., d(m-1)), + // and the ReductionOp operation is performed (d0 * d1 * d2 * ... * d(m-1)) + // times, each including (dm * d(m+1) * ... * d(n-1)) individual data. + // If axis == 0 (the default), the output Blob always has the empty shape + // (count 1), performing reduction across the entire input -- + // often useful for creating new loss functions. + optional int32 axis = 2 [default = 0]; + + optional float coeff = 3 [default = 1.0]; // coefficient for output +} + +// Message that stores hyper-parameters used by ReLULayer +message ReLUConf { + // Allow non-zero slope for negative inputs to speed up optimization + // Described in: + // Maas, A. L., Hannun, A. Y., & Ng, A. Y. (2013). Rectifier nonlinearities + // improve neural network acoustic models. In ICML Workshop on Deep Learning + // for Audio, Speech, and Language Processing. + optional float negative_slope = 1 [default = 0]; + /* + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 2 [default = DEFAULT]; + */ +} + +message ReshapeConf { + // Specify the output dimensions. If some of the dimensions are set to 0, + // the corresponding dimension from the bottom layer is used (unchanged). + // Exactly one dimension may be set to -1, in which case its value is + // inferred from the count of the bottom blob and the remaining dimensions. + // For example, suppose we want to reshape a 2D blob "input" with shape 2 x 8: + // + // layer { + // type: "Reshape" bottom: "input" top: "output" + // reshape_param { ... } + // } + // + // If "input" is 2D with shape 2 x 8, then the following reshape_param + // specifications are all equivalent, producing a 3D blob "output" with shape + // 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 0 dim: 2 dim: -1 } } + // reshape_param { shape { dim: -1 dim: 0 dim: 2 } } + // + optional BlobShape shape = 1; + + // axis and num_axes control the portion of the bottom blob's shape that are + // replaced by (included in) the reshape. By default (axis == 0 and + // num_axes == -1), the entire bottom blob shape is included in the reshape, + // and hence the shape field must specify the entire output shape. + // + // axis may be non-zero to retain some portion of the beginning of the input + // shape (and may be negative to index from the end; e.g., -1 to begin the + // reshape after the last axis, including nothing in the reshape, + // -2 to include only the last axis, etc.). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are all equivalent, + // producing a blob "output" with shape 2 x 2 x 4: + // + // reshape_param { shape { dim: 2 dim: 2 dim: 4 } } + // reshape_param { shape { dim: 2 dim: 4 } axis: 1 } + // reshape_param { shape { dim: 2 dim: 4 } axis: -3 } + // + // num_axes specifies the extent of the reshape. + // If num_axes >= 0 (and axis >= 0), the reshape will be performed only on + // input axes in the range [axis, axis+num_axes]. + // num_axes may also be -1, the default, to include all remaining axes + // (starting from axis). + // + // For example, suppose "input" is a 2D blob with shape 2 x 8. + // Then the following ReshapeLayer specifications are equivalent, + // producing a blob "output" with shape 1 x 2 x 8. + // + // reshape_param { shape { dim: 1 dim: 2 dim: 8 } } + // reshape_param { shape { dim: 1 dim: 2 } num_axes: 1 } + // reshape_param { shape { dim: 1 } num_axes: 0 } + // + // On the other hand, these would produce output blob shape 2 x 1 x 8: + // + // reshape_param { shape { dim: 2 dim: 1 dim: 8 } } + // reshape_param { shape { dim: 1 } axis: 1 num_axes: 0 } + // + optional int32 axis = 2 [default = 0]; + optional int32 num_axes = 3 [default = -1]; +} + +message SigmoidConf { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; +} + +message SliceConf { + // The axis along which to slice -- may be negative to index from the end + // (e.g., -1 for the last axis). + // By default, SliceLayer concatenates blobs along the "channels" axis (1). + optional int32 axis = 3 [default = 1]; + repeated uint32 slice_point = 2; + + // DEPRECATED: alias for "axis" -- does not support negative indexing. + optional uint32 slice_dim = 1 [default = 1]; +} + +// Message that stores hyper-parameters used by SoftmaxLayer, SoftmaxWithLossLayer +message SoftmaxConf { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; + + // The axis along which to perform the softmax -- may be negative to index + // from the end (e.g., -1 for the last axis). + // Any other axes will be evaluated as independent softmaxes. + optional int32 axis = 2 [default = 1]; +} + +message TanHConf { + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 1 [default = DEFAULT]; +} + +// Message that stores hyper-parameters used by TileLayer +message TileConf { + // The index of the axis to tile. + optional int32 axis = 1 [default = 1]; + + // The number of copies (tiles) of the blob to output. + optional int32 tiles = 2; +} + +// Message that stores hyper-parameters used by ThresholdLayer +message ThresholdConf { + optional float threshold = 1 [default = 0]; // Strictly positive values +} + +/* +message WindowDataConf { + // Specify the data source. + optional string source = 1; + // For data pre-processing, we can do simple scaling and subtracting the + // data mean, if provided. Note that the mean subtraction is always carried + // out before scaling. + optional float scale = 2 [default = 1]; + optional string mean_file = 3; + // Specify the batch size. + optional uint32 batch_size = 4; + // Specify if we would like to randomly crop an image. + optional uint32 crop_size = 5 [default = 0]; + // Specify if we want to randomly mirror data. + optional bool mirror = 6 [default = false]; + // Foreground (object) overlap threshold + optional float fg_threshold = 7 [default = 0.5]; + // Background (non-object) overlap threshold + optional float bg_threshold = 8 [default = 0.5]; + // Fraction of batch that should be foreground objects + optional float fg_fraction = 9 [default = 0.25]; + // Amount of contextual padding to add around a window + // (used only by the window_data_layer) + optional uint32 context_pad = 10 [default = 0]; + // Mode for cropping out a detection window + // warp: cropped window is warped to a fixed size and aspect ratio + // square: the tightest square around the window is cropped + optional string crop_mode = 11 [default = "warp"]; + // cache_images: will load all images in memory for faster access + optional bool cache_images = 12 [default = false]; + // append root_folder to locate images + optional string root_folder = 13 [default = ""]; +} +*/ + +message SPPConf { + enum PoolMethod { + MAX = 0; + AVE = 1; + STOCHASTIC = 2; + } + optional uint32 pyramid_height = 1; + optional PoolMethod pool = 2 [default = MAX]; // The pooling method + /* + enum Engine { + DEFAULT = 0; + CAFFE = 1; + CUDNN = 2; + } + optional Engine engine = 6 [default = DEFAULT]; + */ +} + +message PReLUConf { + // Parametric ReLU described in K. He et al, Delving Deep into Rectifiers: + // Surpassing Human-Level Performance on ImageNet Classification, 2015. + + // Initial value of a_i. Default is a_i=0.25 for all i. + optional FillerConf filler = 1; + // Whether or not slope paramters are shared across channels. + optional bool channel_shared = 2 [default = false]; +}
