http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/input_layer/proto_record.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/input_layer/proto_record.h b/include/singa/neuralnet/input_layer/proto_record.h new file mode 100644 index 0000000..bfc9934 --- /dev/null +++ b/include/singa/neuralnet/input_layer/proto_record.h @@ -0,0 +1,73 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_INPUT_LAYER_PROTO_RECORD_H_ +#define SINGA_NEURALNET_INPUT_LAYER_PROTO_RECORD_H_ + +#include <string> +#include <vector> +#include "singa/io/store.h" +#include "singa/neuralnet/layer.h" +#include "singa/neuralnet/input_layer/store_input.h" +#include "singa/utils/data_shard.h" +/** + * \file this file includes the declarations of input layers that inherit the + * base InputLayer to load input features. + * + * The feature loading phase can be implemented using a single layer or + * separated into DataLayer (for loading features as records) and ParserLayer + * (for parsing features from records). SINGA has provided some subclasses of + * DataLayer and ParserLayer. + * + * Data prefetching can be implemented as a sub-class of InputLayer. + * SINGA provides a built-in PrefetchLayer which embeds DataLayer and + * ParserLayer. + */ +namespace singa { +using std::string; +using std::vector; + +/** + * Specific layer that parses the value string loaded by Store into a + * SingleLabelImageRecord. + */ +class ProtoRecordLayer : public SingleLabelRecordLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + + protected: + /** + * Parse key as instance ID and val into SingleLabelImageRecord. + * @copydetails StoreInputLayer::Parse() + */ + bool Parse(int k, int flag, const string& key, const string& val) override; + void LoadRecord(const string& backend, + const string& path, + Blob<float>* to) override; + + private: + // TODO(wangwei) decode the image + bool encoded_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_INPUT_LAYER_PROTO_RECORD_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/input_layer/rgb_image.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/input_layer/rgb_image.h b/include/singa/neuralnet/input_layer/rgb_image.h new file mode 100644 index 0000000..c4d6f18 --- /dev/null +++ b/include/singa/neuralnet/input_layer/rgb_image.h @@ -0,0 +1,66 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_INPUT_LAYER_RGB_IMAGE_H_ +#define SINGA_NEURALNET_INPUT_LAYER_RGB_IMAGE_H_ + +#include <string> +#include <vector> +#include "singa/io/store.h" +#include "singa/neuralnet/layer.h" +#include "singa/neuralnet/input_layer/parser.h" +#include "singa/utils/data_shard.h" +/** + * \file this file includes the declarations of input layers that inherit the + * base InputLayer to load input features. + * + * The feature loading phase can be implemented using a single layer or + * separated into DataLayer (for loading features as records) and ParserLayer + * (for parsing features from records). SINGA has provided some subclasses of + * DataLayer and ParserLayer. + * + * Data prefetching can be implemented as a sub-class of InputLayer. + * SINGA provides a built-in PrefetchLayer which embeds DataLayer and + * ParserLayer. + */ +namespace singa { +using std::string; +using std::vector; +/** + * Derived from ParserLayer to parse RGB image feature from + * SingaleLabelImageRecord. + */ +class RGBImageLayer : public ParserLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ParseRecords(int flag, const std::vector<Record>& records, + Blob<float>* blob) override; + + private: + float scale_; + int cropsize_; + bool mirror_; + Blob<float> mean_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_INPUT_LAYER_RGB_IMAGE_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/input_layer/shard_data.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/input_layer/shard_data.h b/include/singa/neuralnet/input_layer/shard_data.h new file mode 100644 index 0000000..13ea9d8 --- /dev/null +++ b/include/singa/neuralnet/input_layer/shard_data.h @@ -0,0 +1,65 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_INPUT_LAYER_SHARD_DATA_H_ +#define SINGA_NEURALNET_INPUT_LAYER_SHARD_DATA_H_ + +#include <string> +#include <vector> +#include "singa/io/store.h" +#include "singa/neuralnet/layer.h" +#include "singa/neuralnet/input_layer/data.h" +#include "singa/utils/data_shard.h" +/** + * \file this file includes the declarations of input layers that inherit the + * base InputLayer to load input features. + * + * The feature loading phase can be implemented using a single layer or + * separated into DataLayer (for loading features as records) and ParserLayer + * (for parsing features from records). SINGA has provided some subclasses of + * DataLayer and ParserLayer. + * + * Data prefetching can be implemented as a sub-class of InputLayer. + * SINGA provides a built-in PrefetchLayer which embeds DataLayer and + * ParserLayer. + */ +namespace singa { +using std::string; +using std::vector; +/** + * Layer for loading Record from DataShard. + * + * It is derived from DataLayer. + */ +class ShardDataLayer : public DataLayer { + public: + ~ShardDataLayer(); + + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + + private: + DataShard* shard_; +}; + +} // namespace singa + +#endif http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/input_layer/store_input.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/input_layer/store_input.h b/include/singa/neuralnet/input_layer/store_input.h new file mode 100644 index 0000000..dd63be6 --- /dev/null +++ b/include/singa/neuralnet/input_layer/store_input.h @@ -0,0 +1,105 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_INPUT_LAYER_STORE_INPUT_H_ +#define SINGA_NEURALNET_INPUT_LAYER_STORE_INPUT_H_ + +#include <string> +#include <vector> +#include "singa/io/store.h" +#include "singa/neuralnet/layer.h" +#include "singa/utils/data_shard.h" +/** + * \file this file includes the declarations of input layers that inherit the + * base InputLayer to load input features. + * + * The feature loading phase can be implemented using a single layer or + * separated into DataLayer (for loading features as records) and ParserLayer + * (for parsing features from records). SINGA has provided some subclasses of + * DataLayer and ParserLayer. + * + * Data prefetching can be implemented as a sub-class of InputLayer. + * SINGA provides a built-in PrefetchLayer which embeds DataLayer and + * ParserLayer. + */ +namespace singa { +using std::string; +using std::vector; + +/** + * Base class for loading data from Store. + */ +class StoreInputLayer : virtual public InputLayer { + public: + ~StoreInputLayer(); + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + + ConnectionType dst_layer_connection() const override { return kOneToMany; } + + protected: + /** + * Parsing the (key, val) tuple to get feature (and label). + * Subclasses must implment this function. + * @param[in] k parse this tuple as the k-th instance of one mini-batch. + * @param[in] flag used to guide the parsing, e.g., kDeploy phase should not + * parse labels from the tuple. + * @param[in] key + * @param[in] val + */ + virtual bool Parse(int k, int flag, const string& key, const string& val) = 0; + + protected: + int batchsize_ = 1; + int random_skip_ = 0; + io::Store* store_ = nullptr; +}; + +/** + * Base layer for parsing a key-value tuple as a feature vector with fixed + * length. The feature shape is indicated by users in the configuration. + * Each tuple may has a label. + */ +class SingleLabelRecordLayer : public StoreInputLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + + protected: + /** + * Load a single record (tuple), e.g., the mean or standard variance vector. + */ + virtual void LoadRecord(const string& backend, const string& path, + Blob<float>* to) = 0; + + protected: + /** + * Feature standardization by processing each feature dimension via + * @f$ y = (x - mu)/ std @f$ + * <a href= "http://ufldl.stanford.edu/wiki/index.php/Data_Preprocessing"> + * UFLDL</a> + */ + Blob<float> mean_, std_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_INPUT_LAYER_STORE_INPUT_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h new file mode 100644 index 0000000..5bdeb6f --- /dev/null +++ b/include/singa/neuralnet/layer.h @@ -0,0 +1,288 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_LAYER_H_ +#define SINGA_NEURALNET_LAYER_H_ + +#include <map> +#include <string> +#include <thread> +#include <vector> +#include "singa/proto/common.pb.h" +#include "singa/proto/job.pb.h" +#include "singa/utils/common.h" +#include "singa/utils/blob.h" +#include "singa/utils/param.h" + +namespace singa { +using std::vector; +// TODO(wangwei) make AuxType a template argument for Layer. +using AuxType = int; +/** + * Base layer class. + * + * Subclasses should implement at least + * Layer::ComputeFeature() and Layer::ComputGradient() + * functions in accordance with the NeuralNet::TrainOneBatch function. + */ +class Layer { + public: + /** + * Create a sub-layer instance based on proto.type(); + * + * @param proto configuration of the layer instance. + * @return pointer to the newly created layer instance. + */ + static Layer* Create(const LayerProto& proto); + + Layer() {} + virtual ~Layer() {} + /** + * Setup layer properties. + * + * Setup members e.g., shapes of Param objects based on the layer + * configuration and connected layers. + * It should check the partition setting when setup the properties. + * + * @param conf layer configuration. + * @param srclayers source layers that connect to this layer. + */ + virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) { + layer_conf_ = conf; + } + /** + * Compute features of this layer based on connected layers. + * + * @param[in] flag set by the TrainOneBatch function, e.g., to indicate the + * running phase (kForward|kTrain, kForward|kTest, etc). + * @param[in] srclayers source layers that connect to this layer. + */ + virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0; + /** + * Compute gradients for parameters associated with this layer. + * It may also compute the gradients of the loss w.r.t the source layers. + * + * \copydetails ComputeFeature(). + */ + virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0; + /** + * Layers that have paramters must override this function to return all Param + * objects associated with this layer. + * + * @return parameters associated with this layer. + */ + virtual const std::vector<Param*> GetParams() const { + return std::vector<Param*> {}; + } + /** + * Return the connection type between one neuron of this layer and its source + * layer. + * + * Currently support two connection types: kOneToOne, and kOneToAll. + * - kOneToOne indicates the neuron depends on only one neuron from src layer. + * - kOneToAll indicates the neuron depends on all neurons from src layer. + * TODO(wangwei) support kOneToMany. + * + * @param[in] k index of source layer, current only support k = 0. + * @return connection type. + */ + virtual ConnectionType src_neuron_connection(int k) const { + // CHECK_LT(k, srclayers_.size()); + return kOneToOne; + } + /** + * Return the connection type of this layer and all dst layers. + * + * Currently support two connection types: kOneToOne, and kOneToMany. + * - kOneToOne indicates the users implement the ComputeFeature and + * ComputeGradient function considering only one dst layer. In this case, + * a SplitLayer will be added automatically to connect this layer with all + * dest layer. + * - kOneToMany indicates this layer has already considered multiple dst + * layers in the implementation. + * + * @return connection type default is kOneToOne. + */ + virtual ConnectionType dst_layer_connection() const { + return kOneToOne; + } + /** + * To display layer info, e.g., aggreated loss/accuracy, or norm of feature + * vector and norm of parameters. + * + * @param[in] debug whether print the debug info + * @param[in] flag used to get the calling phase, e.g., forward of training + * (kForward | kTrain). + * @return info string about this layer, which is printed into the log. + */ + virtual const std::string ToString(bool debug, int flag); + /** + * @return partition dimension of this layer, + * - -1 for no partition. + * - 0 for partition on the data dimension, i.e., partitioning the mini-batch + * into sub-mini-batches. + * - 1 for partition this layer on feature dimension, i.e., the feature + * vector of each instance is partitioned into sub-vectors. + */ + inline int partition_dim() const { + CHECK_LE(layer_conf_.partition_dim(), 1); + return layer_conf_.partition_dim(); + } + /** + * @return the partition ID (i.e., the worker ID to whom is layer is + * dispatched) of this layer, which is a sublayer partitioned from the + * original layer. + */ + inline int partition_id() const { return layer_conf_.partition_id(); } + /** + * @return total number of partitions (i.e., sub-layers) of the original + * layer of this layer. + */ + inline int num_partitions() const { return layer_conf_.num_partitions(); } + /** + * @return the type of this layer, only valid for built-in layer (types). + */ + inline LayerType type() const { return layer_conf_.type(); } + /** + * @return user-defined layer type. + */ + inline const std::string& user_type() const { + return layer_conf_.user_type(); + } + /** + * Return name of this layer + */ + inline const std::string& name() const { return layer_conf_.name(); } + /** + * @param[in] from pointer to one of the dst layer. For some layers, they have + * more than one data Blob. In this case, this argument identifies the layer + * that is requesting the data Blob. + * @return a const ref for Blob storing feature values of this layer. + */ + virtual const Blob<float>& data(const Layer* from) const { + return data_; + } + /** + * @see data(). + * @return the pointer to the Blob storing feature values of this layer. + */ + virtual Blob<float>* mutable_data(const Layer* from) { + return &data_; + } + /** + * @return auxiliary data, e.g., image label. + */ + virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) const { + return aux_data_; + } + /** + * @see data(). + * @return the const ref of the Blob for the gradient of this layer, mainly + * used in BP algorithm. + */ + virtual const Blob<float>& grad(const Layer* from) const { + return grad_; + } + /** + * @see data(). + * @return a pointer to the Blob storing gradients of this layer, mainly + * used in BP algorithm. + */ + virtual Blob<float>* mutable_grad(const Layer* from) { + return &grad_; + } + + protected: + LayerProto layer_conf_; + Blob<float> data_, grad_; + vector<AuxType> aux_data_; +}; + +/** + * Base layer for connecting layers when neural net is partitioned. + */ +class ConnectionLayer : virtual public Layer { + // defined as a layer category +}; + +/** + * Base layer for getting input data. May include layers for loading records, + * parsing records. + */ +class InputLayer : virtual public Layer { + public: + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {} + Blob<float>* mutable_grad(const Layer* layer) override { + // LOG(FATAL) << "Loss layer has no gradient blob"; + return nullptr; + } + const Blob<float>& grad(const Layer* from) const override { + // LOG(FATAL) << "Loss layer has no gradient blob"; + return grad_; + } +}; + + +/** + * Base layer for calculating loss and doing BackPropagation. + */ +class LossLayer : virtual public Layer { + public: + const std::string ToString(bool debug, int flag) override; + Blob<float>* mutable_grad(const Layer* layer) override { + LOG(FATAL) << "Loss layer has no gradient blob"; + return nullptr; + } + const Blob<float>& grad(const Layer* from) const override { + LOG(FATAL) << "Loss layer has no gradient blob"; + return grad_; + } + protected: + Metric metric_; +}; + +/** + * Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer, + * etc. + */ +class NeuronLayer : virtual public Layer { + // defined as a layer category +}; + +/** + * Base layer for collecting features into disk file, HTTP stream, etc. + */ +class OutpuLayer : virtual public Layer { + public: + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {} + Blob<float>* mutable_grad(const Layer* layer) override { + LOG(FATAL) << "Loss layer has no gradient blob"; + return nullptr; + } + const Blob<float>& grad(const Layer* from) const override { + LOG(FATAL) << "Loss layer has no gradient blob"; + return grad_; + } +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/loss_layer/euclidean.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/loss_layer/euclidean.h b/include/singa/neuralnet/loss_layer/euclidean.h new file mode 100644 index 0000000..57703e0 --- /dev/null +++ b/include/singa/neuralnet/loss_layer/euclidean.h @@ -0,0 +1,47 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_LOSS_LAYER_EUCLIDEAN_H_ +#define SINGA_NEURALNET_LOSS_LAYER_EUCLIDEAN_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" + +/** + * @file this file includes the declarations of layers that inherit the base + * LossLayer for measuring the objective training loss. + */ +namespace singa { +using std::vector; +/** + * Squared Euclidean loss as @f$0.5 ||p - t||^2@f$, where p is for prediction + * t is for ground truth. + */ +class EuclideanLossLayer : public LossLayer { + public: + void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_LOSS_LAYER_EUCLIDEAN_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/loss_layer/softmax.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/loss_layer/softmax.h b/include/singa/neuralnet/loss_layer/softmax.h new file mode 100644 index 0000000..7a57517 --- /dev/null +++ b/include/singa/neuralnet/loss_layer/softmax.h @@ -0,0 +1,63 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_LOSS_LAYER_SOFTMAX_H_ +#define SINGA_NEURALNET_LOSS_LAYER_SOFTMAX_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" + +/** + * @file this file includes the declarations of layers that inherit the base + * LossLayer for measuring the objective training loss. + */ +namespace singa { +using std::vector; +/** + * Cross-entropy loss applied to the probabilities computed from Softmax. + * @f$ L_i = -log P_{t_i}, t_i\in [0, C] @f$ is the label for the i-th object, + * C is the total number of classes. + */ +class SoftmaxLossLayer : public LossLayer { + public: + void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + + /** + * softmax is not recommendeded for partition because it requires the whole + * src layer for normalization. + */ + ConnectionType src_neuron_connection(int k) const override { + // CHECK_LT(k, srclayers_.size()); + return kOneToAll; + } + + private: + int batchsize_; + int dim_; + float scale_; + int topk_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_LOSS_LAYER_SOFTMAX_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuralnet.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h new file mode 100644 index 0000000..c3ddb95 --- /dev/null +++ b/include/singa/neuralnet/neuralnet.h @@ -0,0 +1,118 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURALNET_H_ +#define SINGA_NEURALNET_NEURALNET_H_ + +#include <string> +#include <vector> +#include <unordered_map> + +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" +#include "singa/utils/factory.h" +#include "singa/utils/graph.h" + +namespace singa { +/** + * The neural network is constructed from user configurations in NetProto. + * + * Some layers, e.g., SplitLayer and BridgeSrcLayer/BridgeDstLayer + * will be added implicitly to partition the neural network. + * TODO(wangwei) create wrappers for popular models, e.g., MLP, CNN. + */ +class NeuralNet { + public: + /** + * Create the neural network for training, test or validation. + * + * Parameters for test/validation net can share those from training after + * setup (done outside of this funcion). + * + * @param net_conf proto for the neural network + * @param phase test/training/validation + * @param npartitions num of partitions, do partitioning if num > 1 + * @return pointer to a neural net + */ + static NeuralNet* Create(const NetProto& net_conf, Phase phase, + int npartitions); + + /** + * construct the net structure from protocol buffer. + * @param netproto neural net config + * @param npartitions num of partitions. 1 for no partitioning. + */ + NeuralNet(NetProto net_conf, int num_partitions); + ~NeuralNet(); + /** + * To display the adjacency layers + std::string ToAdjacency(); + */ + /** + * Share memory of parameter values from other neuralnet + */ + void ShareParamsFrom(NeuralNet* other); + inline const std::vector<Layer*>& layers() const { return layers_; } + inline const std::vector<Param*>& params() const { return params_; } + inline Layer* name2layer(std::string name) const { + CHECK(name2layer_.find(name) != name2layer_.end()) + << "No layer with name " << name; + return name2layer_.at(name); + } + inline const std::vector<Layer*>& srclayers(const Layer* layer) const { + CHECK(src_map_.find(layer) != src_map_.end()) + << "layer (" << layer->name() << " ) has no source layers"; + return src_map_.at(layer); + } + inline Param* paramid2param(int id) const { return paramid2param_.at(id); } + + protected: + /** + * Create a neural net graph, one node for each layer. + * + * Partition the graph if npartitions > 1, each layer is sliced according to + * its own partition setting. + * @param netproto + * @npartitions + * @return neural net graph + */ + Graph* CreateGraph(const NetProto& netproto, int num_partitions); + /** + * Create neural net from graph, one layer per node. + */ + void CreateNetFromGraph(Graph* graph, int num_partitions); + /** + * prepare data structures, e.g., params_, layers_, etc. + */ + void PrepareDataStructures(); + + protected: + std::vector<Layer*> layers_; + std::vector<Param*> params_; + + std::unordered_map<std::string, Layer*> name2layer_; + std::unordered_map<int, Param*> paramid2param_; + std::unordered_map<const Layer*, std::vector<Layer*>> src_map_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURALNET_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/convolution.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/convolution.h b/include/singa/neuralnet/neuron_layer/convolution.h new file mode 100644 index 0000000..a73131d --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/convolution.h @@ -0,0 +1,72 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_CONVOLUTION_H_ +#define SINGA_NEURALNET_NEURON_LAYER_CONVOLUTION_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { +/** + * Convolution layer. + */ +class ConvolutionLayer : public NeuronLayer { + public: + ~ConvolutionLayer(); + + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + const std::vector<Param*> GetParams() const override { + std::vector<Param*> params{weight_, bias_}; + return params; + } + ConnectionType src_neuron_connection(int k) const override { + // CHECK_LT(k, srclayers_.size()); + return kOneToAll; + } + + protected: + int kernel_, pad_, stride_; + int batchsize_, channels_, height_, width_; + int col_height_, col_width_, conv_height_, conv_width_, num_filters_; + Param* weight_, *bias_; + Blob<float> col_data_, col_grad_; +}; + +/** + * Use im2col from Caffe + */ +class CConvolutionLayer : public ConvolutionLayer { + public: + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_CONVOLUTION_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/dropout.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/dropout.h b/include/singa/neuralnet/neuron_layer/dropout.h new file mode 100644 index 0000000..1fe189e --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/dropout.h @@ -0,0 +1,51 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_DROPOUT_H_ +#define SINGA_NEURALNET_NEURON_LAYER_DROPOUT_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { + +class DropoutLayer : public NeuronLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + protected: + // drop probability + float pdrop_; + /* record which neuron is dropped, required for back propagating gradients, + * if mask[i]=0, then the i-th neuron is dropped. + */ + Blob<float> mask_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_DROPOUT_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/inner_product.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/inner_product.h b/include/singa/neuralnet/neuron_layer/inner_product.h new file mode 100644 index 0000000..e109bb3 --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/inner_product.h @@ -0,0 +1,55 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_INNER_PRODUCT_H_ +#define SINGA_NEURALNET_NEURON_LAYER_INNER_PRODUCT_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { + +class InnerProductLayer : public NeuronLayer { + public: + ~InnerProductLayer(); + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + const std::vector<Param*> GetParams() const override { + std::vector<Param*> params{weight_, bias_}; + return params; + } + + private: + int batchsize_; + int vdim_, hdim_; + bool transpose_; + Param *weight_, *bias_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_INNER_PRODUCT_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/lrn.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/lrn.h b/include/singa/neuralnet/neuron_layer/lrn.h new file mode 100644 index 0000000..ba983a4 --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/lrn.h @@ -0,0 +1,60 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_LRN_H_ +#define SINGA_NEURALNET_NEURON_LAYER_LRN_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { +/** + * Local Response Normalization edge + * + * b_i=a_i/x_i^beta + * x_i=knorm+alpha*\sum_{j=max(0,i-n/2}^{min(N,i+n/2}(a_j)^2 + * n is size of local response area. + * a_i, the activation (after ReLU) of a neuron convolved with the i-th kernel. + * b_i, the neuron after normalization, N is the total num of kernels + */ +class LRNLayer : public NeuronLayer { + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + + protected: + //! shape of the bottom layer feature + int batchsize_, channels_, height_, width_; + //! size local response (neighbor) area + int lsize_; + //! hyper-parameter + float alpha_, beta_, knorm_; + Blob<float> norm_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_LRN_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/pooling.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/pooling.h b/include/singa/neuralnet/neuron_layer/pooling.h new file mode 100644 index 0000000..d8f0f68 --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/pooling.h @@ -0,0 +1,60 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_POOLING_H_ +#define SINGA_NEURALNET_NEURON_LAYER_POOLING_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { + +class PoolingLayer : public NeuronLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + + protected: + int kernel_, pad_, stride_; + int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_; + PoolingProto_PoolMethod pool_; +}; +/** + * Use book-keeping for BP following Caffe's pooling implementation + */ +class CPoolingLayer : public PoolingLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers); + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + private: + Blob<float> mask_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_POOLING_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/rbm.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/rbm.h b/include/singa/neuralnet/neuron_layer/rbm.h new file mode 100644 index 0000000..c2e1140 --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/rbm.h @@ -0,0 +1,99 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_RBM_H_ +#define SINGA_NEURALNET_NEURON_LAYER_RBM_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { +/** + * Base layer for RBM models. + */ +class RBMLayer: virtual public Layer { + public: + virtual ~RBMLayer() {} + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + const Blob<float>& neg_data(const Layer* layer) { + return neg_data_; + } + Blob<float>* mutable_neg_data(const Layer* layer) { + return &neg_data_; + } + const std::vector<Param*> GetParams() const override { + std::vector<Param*> params{weight_, bias_}; + return params; + } + virtual Blob<float>* Sample(int flat); + + protected: + //! if ture, sampling according to guassian distribution + bool gaussian_; + //! dimension of the hidden layer + int hdim_; + //! dimension of the visible layer + int vdim_; + int batchsize_; + bool first_gibbs_; + Param* weight_, *bias_; + + Blob<float> neg_data_; + Blob<float> neg_sample_; + Blob<float> sample_; +}; + +/** + * RBM visible layer + */ +class RBMVisLayer: public RBMLayer, public LossLayer { + public: + ~RBMVisLayer(); + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + + private: + RBMLayer* hid_layer_; + Layer* input_layer_; +}; +/** + * RBM hidden layer + */ +class RBMHidLayer: public RBMLayer { + public: + ~RBMHidLayer(); + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; + + private: + RBMLayer *vis_layer_; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_RBM_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/relu.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/relu.h b/include/singa/neuralnet/neuron_layer/relu.h new file mode 100644 index 0000000..735be7c --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/relu.h @@ -0,0 +1,44 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_RELU_H_ +#define SINGA_NEURALNET_NEURON_LAYER_RELU_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { + +class ReLULayer : public NeuronLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_RELU_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/sigmoid.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/sigmoid.h b/include/singa/neuralnet/neuron_layer/sigmoid.h new file mode 100644 index 0000000..2d32206 --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/sigmoid.h @@ -0,0 +1,51 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_SIGMOID_H_ +#define SINGA_NEURALNET_NEURON_LAYER_SIGMOID_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { +/** + * This layer apply Sigmoid function to neuron activations. + * f(x)=1/(1+exp(-x)) + * f'(x)=f(x)*(1-f(x)) + */ +class SigmoidLayer: public Layer { + public: + using Layer::ComputeFeature; + using Layer::ComputeGradient; + + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_SIGMOID_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/neuron_layer/stanh.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer/stanh.h b/include/singa/neuralnet/neuron_layer/stanh.h new file mode 100644 index 0000000..e25f44c --- /dev/null +++ b/include/singa/neuralnet/neuron_layer/stanh.h @@ -0,0 +1,47 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_NEURON_LAYER_STANH_H_ +#define SINGA_NEURALNET_NEURON_LAYER_STANH_H_ + +#include <vector> +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" + +/** + * \file this file includes the declarations neuron layer classes that conduct + * the transformation of features. + */ +namespace singa { +/** + * This layer apply scaled Tanh function to neuron activations. + * f(x)=1.7159047 tanh(0.66666667 x) + */ +class STanhLayer : public NeuronLayer { + public: + void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; + void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; + void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; +}; + +} // namespace singa + +#endif // SINGA_NEURALNET_NEURON_LAYER_STANH_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/neuralnet/output_layer/output_layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/output_layer/output_layer.h b/include/singa/neuralnet/output_layer/output_layer.h new file mode 100644 index 0000000..d48d805 --- /dev/null +++ b/include/singa/neuralnet/output_layer/output_layer.h @@ -0,0 +1,27 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_NEURALNET_OUTPUT_LAYER_H_ +#define SINGA_NEURALNET_OUTPUT_LAYER_H_ + +// currently no output sub-classes are defined + +#endif // SINGA_NEURALNET_OUTPUT_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/server.h ---------------------------------------------------------------------- diff --git a/include/singa/server.h b/include/singa/server.h new file mode 100644 index 0000000..4bffeae --- /dev/null +++ b/include/singa/server.h @@ -0,0 +1,133 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_SERVER_H_ +#define SINGA_SERVER_H_ + +#include <unordered_map> +#include <vector> +#include "singa/comm/socket.h" +#include "singa/proto/job.pb.h" +#include "singa/utils/param.h" +#include "singa/utils/updater.h" + +namespace singa { + + /* Repsond to worker's get/put/udpate request, and periodically syncing with + * other servers. + * + * Normally, the Server creates a response message for each request which + * will be sent back to the one who issued the request. However, if the request + * are not processed successfully, the original message will be returned. The + * sever does not know the returned message is a response or the original + * message. It just sends it to the router. The router will decided to + * re-send the request to the server or send it to the worker. + */ +class Server { + public: + ~Server(); + Server(int group_id, int server_id, + const JobProto& job_conf, + const std::vector<int>& slice2group, + const std::vector<int>& slice2server); + void Run(); + inline int grp_id() const { return grp_id_; } + inline int id() const { return id_; } + + protected: + /** + * Process GET request. + * + * @return the orignal message or a response message which contains the values + * of the Param with the request version. + */ + Msg* HandleGet(Msg** msg); + /** + * Process Update request. + * + * It waits until received the gradients from all workers from the same worker + * group. After updating, it responses to each sender with the new Param + * values. It may generate a sync message to the server group that maintains + * the global version of the updated Param (slice). + * + * Note: there is no counter for each worker group on the number of received + * update requests. Hence it is possible that the server would conduct the + * update when it receives x requests from group a and y requests from group + * b where x + y = group size. To avoid this problem, we can + * -# maintain request list for each group for each Param at the server side + * -# do not span a worker group among multiple nodes. then the updates from + * the same group would be locally aggregated on the worker node. And the + * server would conduct the update immediately after receiving the aggregated + * request. + * -# launch only one worker group. + * + * @return the orignal message or response message + */ + const std::vector<Msg*> HandleUpdate(Msg **msg); + /** + * Process PUT request. + * + * @return the original message or response message. If we don't want to + * acknowledge the put request, then return nullptr. + */ + Msg* HandlePut(Msg **msg); + /** + * Handle sync request from other server groups. + * + * It adds updates of Param (slice) from other server groups directly to + * local Param (slice). Currently, each Param (slice) has a master group, + * i.e., slice2group_[sliceid], which would receive such requests from all + * other server groups for the Param object. + * + * @param msg request msg containing the parameter updates + * @return response msg that contains the fresh parameter values. + */ + Msg* HandleSyncRequest(Msg** msg); + /** + * Handle sync response. + * + * The response msg includes the latest values of a Param object from the + * server group that maintainers this Param object. + * The local Param values are replaced with the addition result of local + * udpates since the sync request was sent and the received Param values. + * + * @param response message + */ + void HandleSyncResponse(Msg** msg); + + protected: + int grp_id_ = -1; + int id_ = -1; + Updater* updater_ = nullptr; + //!< map from slice ID to slice and deleted in the destructor + std::unordered_map<int, ParamEntry*> shard_; + std::vector<int> slice2group_, slice2server_; + //!< num of updates from last sync with master server group for a param/slice + std::vector<int> n_updates_; + //!< num of sync requests that have not been responded + std::vector<int> n_pending_sync_; + std::vector<Blob<float>> last_sync_; + std::unordered_map<int, std::vector<Msg*>> buffer_requests_; +}; + +} // namespace singa + +#endif // SINGA_SERVER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/singa.h ---------------------------------------------------------------------- diff --git a/include/singa/singa.h b/include/singa/singa.h new file mode 100644 index 0000000..9bc5ba5 --- /dev/null +++ b/include/singa/singa.h @@ -0,0 +1,37 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_SINGA_H_ +#define SINGA_SINGA_H_ + +#include "singa/comm/socket.h" +#include "singa/io/store.h" +#include "singa/neuralnet/neuralnet.h" +#include "singa/neuralnet/layer.h" +#include "singa/proto/job.pb.h" +#include "singa/proto/singa.pb.h" +#include "singa/utils/common.h" +#include "singa/utils/param.h" +#include "singa/utils/singleton.h" +#include "singa/utils/factory.h" +#include "singa/driver.h" + +#endif // SINGA_SINGA_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/stub.h ---------------------------------------------------------------------- diff --git a/include/singa/stub.h b/include/singa/stub.h new file mode 100644 index 0000000..0ab6fd4 --- /dev/null +++ b/include/singa/stub.h @@ -0,0 +1,109 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_STUB_H_ +#define SINGA_STUB_H_ + +#include <queue> +#include <unordered_map> +#include <vector> +#include <string> +#include "singa/comm/socket.h" +#include "singa/neuralnet/neuralnet.h" +#include "singa/proto/job.pb.h" +#include "singa/proto/singa.pb.h" +#include "singa/utils/factory.h" +#include "singa/utils/param.h" +#include "singa/utils/singleton.h" +#include "singa/server.h" +#include "singa/worker.h" + +namespace singa { + +class Stub { + public: + ~Stub(); + /** + * Find an endpoint to bind. + */ + void Setup(); + /** + * The Stub instance runs this function in the main thread to handle (e.g., + * forward) messages from workers and servers. + * + * @param[in] slice2server the k-th value is the ID of the server that is in + * charge of updating the Param slice with ID k. Large Param objects are + * sliced into subsets for load-balance. Different subsets are updated by + * different servers. + */ + void Run(const vector<int>& slice2server, + const std::vector<Worker*>& workers, + const std::vector<Server*>& servers); + + const std::string& endpoint() const { + return endpoint_; + } + + protected: + /** + * Create a socket to send msg to the specified process + * @param dst_procs the dst process (logical) ID + * @return the newly created socket + */ + Dealer* CreateInterProcsDealer(int dst_procs); + /** + * Generate a request message to Get the parameter object. + */ + const std::vector<Msg*> HandleGetRequest(ParamEntry* entry, Msg** msg); + void HandleGetResponse(ParamEntry* entry, Msg** msg); + /** + * Generate a request message to Update the parameter object. + */ + const std::vector<Msg*> HandleUpdateRequest(ParamEntry* entry, Msg** msg); + /** + * Handle response msg from servers for the update requests. + */ + void HandleUpdateResponse(ParamEntry* entry, Msg** msg); + /** + * Generate a request message to Put the parameter object. + */ + const std::vector<Msg*> HandlePutRequest(ParamEntry* entry, Msg** msg); + /** + * Called by HandlePut, HandleUpdate and HandleGet functions + * @param type message type + * @param version param version + * @param entry + * @param msg + * @param ret generated messages + */ + void GenMsgs(int type, int version, ParamEntry* entry, + Msg* msg, std::vector<Msg*> *ret); + + + protected: + Router *router_ = nullptr; + std::string endpoint_; + std::vector<int> slice2server_; +}; + +} // namespace singa + +#endif // SINGA_STUB_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/utils/blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h new file mode 100644 index 0000000..b6bcc3e --- /dev/null +++ b/include/singa/utils/blob.h @@ -0,0 +1,238 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +/** + * The code is adapted from that of Caffe whose license is attached. + * + * COPYRIGHT + * All contributions by the University of California: + * Copyright (c) 2014, The Regents of the University of California (Regents) + * All rights reserved. + * All other contributions: + * Copyright (c) 2014, the respective contributors + * All rights reserved. + * Caffe uses a shared copyright model: each contributor holds copyright over + * their contributions to Caffe. The project versioning records all such + * contribution and copyright details. If a contributor wants to further mark + * their specific copyright on a particular contribution, they should indicate + * their copyright solely in the commit message of the change when it is + * committed. + * LICENSE + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR + * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * CONTRIBUTION AGREEMENT + * By contributing to the BVLC/caffe repository through pull-request, comment, + * or otherwise, the contributor releases their content to the + * license and copyright terms herein. + * + */ +#ifndef SINGA_UTILS_BLOB_H_ +#define SINGA_UTILS_BLOB_H_ + +#include <glog/logging.h> +#include <memory> +#include <vector> +#include "singa/proto/common.pb.h" +#include "mshadow/tensor.h" +#include "mshadow/cxxnet_op.h" + +namespace singa { + +inline void MallocHost(void** ptr, size_t size) { + *ptr = malloc(size); +} + +inline void FreeHost(void* ptr) { + free(ptr); +} + +/** + * @brief Manages memory allocation and synchronization between the host (CPU) + * and device (GPU). + * + * TODO(dox): more thorough description. + */ +class SyncedMemory { + public: + enum SyncedHead { UNINITIALIZED, + HEAD_AT_CPU, + HEAD_AT_GPU, + SYNCED }; + + SyncedMemory() {} + explicit SyncedMemory(size_t size) : size_(size) {} + ~SyncedMemory(); + + const void* cpu_data(); + const void* gpu_data(); + void* mutable_cpu_data(); + void* mutable_gpu_data(); + void set_cpu_data(void* data); + inline SyncedHead head() { return head_; } + inline size_t size() { return size_; } + + private: + void to_cpu(); + void to_gpu(); + + void* cpu_ptr_ = nullptr; + void* gpu_ptr_ = nullptr; + size_t size_ = 0; + SyncedHead head_ = UNINITIALIZED; + bool own_cpu_data_ = false; +}; // class SyncedMemory + + +template <typename Dtype> +class Blob { + public: + Blob() {} + explicit Blob(const std::vector<int>& shape) { Reshape(shape); } + /** + * @brief Change the dimensions of the blob, allocating new memory if + * necessary. + * + * This function can be called both to create an initial allocation + * of memory, and to adjust the dimensions of a top blob during Layer::Reshape + * or Layer::Forward. When changing the size of blob, memory will only be + * reallocated if sufficient memory does not already exist, and excess memory + * will never be freed. + * + * Note that reshaping an input blob and immediately calling Net::Backward is + * an error; either Net::Forward or Net::Reshape need to be called to + * propagate the new input shape to higher layers. + */ + void Reshape(const std::vector<int>& shape); + void ReshapeLike(const Blob& other); + /** + * @brief Copy from a source Blob. + * + * @param source the Blob to copy from + * @param reshape if false, require this Blob to be pre-shaped to the shape + * of other (and die otherwise); if true, Reshape this Blob to other's + * shape if necessary + */ + void CopyFrom(const Blob<Dtype>& source); + void CopyFrom(const Blob<Dtype>& source, bool reshape); + void FromProto(const singa::BlobProto& proto); + void ToProto(singa::BlobProto* proto) const; + /** + * @brief Set the data_ shared_ptr to point to the SyncedMemory holding the + * data_ of Blob other -- useful in Layer&s which simply perform a copy + * in their Forward pass. + * + * This deallocates the SyncedMemory holding this Blob's data_, as + * shared_ptr calls its destructor when reset with the "=" operator. + */ + void ShareData(const Blob& other); + void Swap(Blob& other); + inline const std::vector<int>& shape() const { return shape_; } + inline int count() const { return count_; } + inline const int version() const { return version_; } + inline void set_version(int v) { version_ = v; } + inline const Dtype* cpu_data() const { + CHECK(data_); + return static_cast<const Dtype*>(data_->cpu_data()); + } + inline void set_cpu_data(Dtype* data) { + CHECK(data); + data_->set_cpu_data(data); + } + inline const Dtype* gpu_data() const { + CHECK(data_); + return static_cast<const Dtype*>(data_->gpu_data()); + } + inline Dtype* mutable_cpu_data() { + CHECK(data_); + return static_cast<Dtype*>(data_->mutable_cpu_data()); + } + inline Dtype* mutable_gpu_data() { + CHECK(data_); + return static_cast<Dtype*>(data_->mutable_gpu_data()); + } + /// @brief Compute the sum of absolute values (L1 norm) of the data. + Dtype asum_data() const; + Dtype sum_data() const; + + protected: + std::shared_ptr<SyncedMemory> data_ = nullptr; + std::vector<int> shape_; + int count_ = 0; + int capacity_ = 0; + int version_ = -1; +}; // class Blob + +using namespace mshadow; +using mshadow::cpu; + +using mshadow::Shape; +using mshadow::Shape1; +using mshadow::Shape2; +using mshadow::Shape3; +using mshadow::Shape4; +using mshadow::Tensor; + +using std::vector; + +inline Tensor<cpu, 4> Tensor4(Blob<float>* blob) { + const vector<int>& shape = blob->shape(); + Tensor<cpu, 4> tensor(blob->mutable_cpu_data(), + Shape4(shape[0], shape[1], shape[2], shape[3])); + return tensor; +} + +inline Tensor<cpu, 3> Tensor3(Blob<float>* blob) { + const vector<int>& shape = blob->shape(); + Tensor<cpu, 3> tensor(blob->mutable_cpu_data(), + Shape3(shape[0], shape[1], blob->count() / shape[0] / shape[1])); + return tensor; +} + +inline Tensor<cpu, 2> Tensor2(Blob<float>* blob) { + const vector<int>& shape = blob->shape(); + Tensor<cpu, 2> tensor(blob->mutable_cpu_data(), + Shape2(shape[0], blob->count() / shape[0])); + return tensor; +} + +inline Tensor<cpu, 1> Tensor1(Blob<float>* blob) { + Tensor<cpu, 1> tensor(blob->mutable_cpu_data(), Shape1(blob->count())); + return tensor; +} + +} // namespace singa + +#endif // SINGA_UTILS_BLOB_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/utils/cluster.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/cluster.h b/include/singa/utils/cluster.h new file mode 100644 index 0000000..c1dc93b --- /dev/null +++ b/include/singa/utils/cluster.h @@ -0,0 +1,163 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_UTILS_CLUSTER_H_ +#define SINGA_UTILS_CLUSTER_H_ + +#include <glog/logging.h> +#include <string> +#include <unordered_map> +#include <memory> +#include <vector> +#include "singa/proto/job.pb.h" +#include "singa/proto/singa.pb.h" +#include "singa/utils/cluster_rt.h" +#include "singa/utils/common.h" +#include "singa/utils/singleton.h" + +namespace singa { + +/** + * Cluster is a singleton object, which provides cluster configuations, + * e.g., the topology of the cluster. + * All IDs start from 0. + */ +class Cluster { + public: + // Cluster is a global singleton in a process + static Cluster* Setup(int job_id, const SingaProto& singaConf, + const ClusterProto& clusterConf); + static Cluster* Get(); + + inline int nserver_groups() const { return cluster_.nserver_groups(); } + inline int nworker_groups() const { return cluster_.nworker_groups(); } + inline int nworkers_per_group() const { return cluster_.nworkers_per_group();} + inline int nservers_per_group() const { return cluster_.nservers_per_group();} + inline int nworkers_per_procs() const { return cluster_.nworkers_per_procs();} + inline int nservers_per_procs() const { return cluster_.nservers_per_procs();} + inline int nworker_groups_per_server_group() const { + if (nserver_groups() == 0 || nservers_per_group() == 0) + return 1; + else + return cluster_.nworker_groups() / cluster_.nserver_groups(); + } + /** + * @return true if the calling procs has server threads, otherwise false + */ + inline bool has_server() const { + if (server_worker_separate()) { + CHECK_LT(procs_id_, nprocs_); + return procs_id_ >= nworker_procs(); + } else { + return procs_id_ < nserver_procs(); + } + } + /** + * @return true if the calling procs has worker threads. + */ + inline bool has_worker() const { + return procs_id_ < nworker_procs(); + } + /** + * @return global procs id, which starts from 0. + */ + inline int procs_id() const { return procs_id_; } + inline void set_procs_id(int procs_id) { procs_id_ = procs_id; } + inline bool server_worker_separate() const { + return cluster_.server_worker_separate(); + } + inline int nworker_procs() const { + return nworker_groups() * nworkers_per_group() / nworkers_per_procs(); + } + inline int nserver_procs() const { + return nserver_groups() * nservers_per_group() / nservers_per_procs(); + } + inline int nprocs() const { return nprocs_; } + /** + * @return endpoint of the router of a procs with the specified id + */ + inline std::string endpoint(int procs_id) const { + CHECK_LT(procs_id, nprocs()); + CHECK_GE(procs_id, 0); + return cluster_rt_->GetProcHost(procs_id); + } + inline std::string workspace() const { return cluster_.workspace(); } + inline std::string vis_folder() const { + return cluster_.workspace() + "/visualization"; + } + inline std::string checkpoint_folder() const { + return cluster_.workspace() + "/checkpoint"; + } + /* + const int stub_timeout() const { return cluster_.stub_timeout(); } + const int worker_timeout() const { return cluster_.worker_timeout(); } + const int server_timeout() const { return cluster_.server_timeout(); } + */ + inline bool share_memory() const { return cluster_.share_memory(); } + inline int sync_freq() const { return cluster_.sync_freq(); } + inline int poll_time() const { return cluster_.poll_time(); } + ClusterRuntime* runtime() const { return cluster_rt_; } + + /** + * @return logical procs ID + */ + inline int ProcsIDOf(int group_id, int id, int flag) { + return procs_ids_.at(Hash(group_id, id, flag)); + } + inline std::string hostip() const { return hostip_; } + + /** + * @param pid, processs ID + * @param group_size, num of executors in a group + * @param procs_size, num of executors in a procs + * + * @return a vector with 4 integers: + * [group start, group end), [start executor, end executor) + */ + const std::vector<int> ExecutorRng(int pid, int group_size, int procs_size); + /** + * Register this process. + * + * @param pid physical process id get from OS, all other procs ID refers to + * logical process ID. + * @param endpoint unique string for other procs to connect + */ + void Register(int pid, const std::string& endpoint); + + private: + void Init(int job, const SingaProto& singaConf, + const ClusterProto& clusterConf); + void SetupFolders(const ClusterProto &cluster); + int Hash(int gid, int id, int flag); + + int procs_id_ = -1; + int nprocs_ = 0; + std::string hostip_ = ""; + // cluster config proto + ClusterProto cluster_; + SingaProto singa_; + ClusterRuntime* cluster_rt_ = nullptr; + std::unordered_map<int, int> procs_ids_; +}; + +} // namespace singa + +#endif // SINGA_UTILS_CLUSTER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/239ed217/include/singa/utils/cluster_rt.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/cluster_rt.h b/include/singa/utils/cluster_rt.h new file mode 100644 index 0000000..5de6c16 --- /dev/null +++ b/include/singa/utils/cluster_rt.h @@ -0,0 +1,190 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ + +#ifndef SINGA_UTILS_CLUSTER_RT_H_ +#define SINGA_UTILS_CLUSTER_RT_H_ + +#include <zookeeper/zookeeper.h> +#include <string> +#include <vector> + +namespace singa { + +typedef void (*rt_callback)(void *contest); + +const int kZKBufSize = 100; +// following paths are global +const std::string kZKPathSinga = "/singa"; +const std::string kZKPathSys = "/singa/sys"; +const std::string kZKPathJLock = "/singa/sys/job-lock"; +const std::string kZKPathHostIdx = "/singa/sys/host-idx"; +const std::string kZKPathApp = "/singa/app"; +const std::string kZKPathJob = "/singa/app/job-"; +// following paths are local under /singa/app/job-X +const std::string kZKPathJobGroup = "/group"; +const std::string kZKPathJobProc = "/proc"; +const std::string kZKPathJobPLock = "/proc-lock"; + +inline std::string GetZKJobWorkspace(int job_id) { + char buf[kZKBufSize]; + snprintf(buf, kZKBufSize, "%010d", job_id); + return kZKPathJob + buf; +} + +struct RTCallback { + rt_callback fn; + void* ctx; +}; + +struct JobInfo { + int id; + int procs; + std::string name; +}; + +/* + * A wrapper for zookeeper service which handles error code and reconnections + */ +class ZKService { + public: + static void ChildChanges(zhandle_t* zh, int type, int state, + const char *path, void* watcherCtx); + + ~ZKService(); + bool Init(const std::string& host, int timeout); + bool CreateNode(const char* path, const char* val, int flag, char* output); + bool DeleteNode(const char* path); + bool Exist(const char* path); + bool UpdateNode(const char* path, const char* val); + bool GetNode(const char* path, char* output); + bool GetChild(const char* path, std::vector<std::string>* vt); + bool WGetChild(const char* path, std::vector<std::string>* vt, + RTCallback *cb); + + private: + const int kNumRetry = 5; + const int kSleepSec = 1; + + static void WatcherGlobal(zhandle_t* zh, int type, int state, + const char *path, void* watcherCtx); + + zhandle_t* zkhandle_ = nullptr; +}; + +/** + * ClusterRuntime is a runtime service that manages dynamic configuration + * and status of the whole cluster. It mainly provides following services: + * 1) Provide running status of each server/worker + * 2) Translate process id to (hostname:port) + */ +class ClusterRuntime { + public: + ClusterRuntime(const std::string& host, int job_id); + ClusterRuntime(const std::string& host, int job_id, int timeout); + ~ClusterRuntime(); + + /** + * Initialize the runtime instance + */ + bool Init(); + /** + * register the process, and get a unique process id + * + * \return the process id, -1 if failed + */ + int RegistProc(const std::string& host_addr, int pid); + /** + * translate the process id to host address + * + * \return the host and port, "" if no such proc id + */ + std::string GetProcHost(int proc_id); + /** + * Server: watch all workers in a server group, + * will be notified when all workers have left + */ + bool WatchSGroup(int gid, int sid, rt_callback fn, void* ctx); + /** + * Worker: join a server group (i.e. start to read/update these servers) + */ + bool JoinSGroup(int gid, int wid, int s_group); + /** + * Worker: leave a server group (i.e. finish its all work) + */ + bool LeaveSGroup(int gid, int wid, int s_group); + + private: + inline std::string groupPath(int gid) { + return group_path_ + "/sg" + std::to_string(gid); + } + inline std::string workerPath(int gid, int wid) { + return "/g" + std::to_string(gid) + "_w" + std::to_string(wid); + } + + int timeout_ = 30000; + std::string host_ = ""; + ZKService zk_; + std::string workspace_ = ""; + std::string group_path_ = ""; + std::string proc_path_ = ""; + std::string proc_lock_path_ = ""; + std::vector<RTCallback*> cb_vec_; +}; + +class JobManager { + public: + // host is comma separated host:port pairs, each corresponding to a zk server. + // e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" + explicit JobManager(const std::string& host); + JobManager(const std::string& host, int timeout); + + // NOTICE: Init must be called once, before start to use other functions + bool Init(); + // generate a unique job id + bool GenerateJobID(int* id); + // generate a list of hosts for a job conf + bool GenerateHostList(const char* host_file, const char* job_file, + std::vector<std::string>* list); + // list all jobs recorded in zk + bool ListJobs(std::vector<JobInfo>* jobs); + // list running processes for a job + bool ListJobProcs(int job, std::vector<std::string>* procs); + // remove a job path in zk + bool Remove(int job); + // remove all job paths in zk + bool RemoveAllJobs(); + // remove all singa related paths in zk + bool CleanUp(); + + private: + const int kJobsNotRemoved = 10; + + bool CleanPath(const std::string& path, bool remove); + std::string ExtractClusterConf(const char* job_file); + + int timeout_ = 30000; + std::string host_ = ""; + ZKService zk_; +}; + +} // namespace singa + +#endif // SINGA_UTILS_CLUSTER_RT_H_
