http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/input_layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/input_layer.h b/include/singa/neuralnet/input_layer.h deleted file mode 100644 index 0499c4b..0000000 --- a/include/singa/neuralnet/input_layer.h +++ /dev/null @@ -1,336 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_INPUT_LAYER_H_ -#define SINGA_NEURALNET_INPUT_LAYER_H_ - -#include <string> -#include <vector> -#include <thread> -#include "singa/io/store.h" -#include "singa/io/kvfile.h" -#include "singa/neuralnet/layer.h" - -namespace singa { - -/** - * Base class for loading data from Store. - */ -class StoreInputLayer : virtual public InputLayer { - public: - ~StoreInputLayer(); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - protected: - /** - * Helper method for doing the prefetching, basically read (key,value) pairs - * to buf_keys and buf_vals_ vector of size batchsize_. - */ - void fetch_data(); - /** - * Parsing the (key, val) tuple to get feature (and label). - * Subclasses must implment this function. - * @param[in] k parse this tuple as the k-th instance of one mini-batch. - * @param[in] flag used to guide the parsing, e.g., kDeploy phase should not - * parse labels from the tuple. - * @param[in] key - * @param[in] val - */ - virtual bool Parse(int k, int flag, const string& key, const string& val) = 0; - - protected: - int batchsize_ = 1; - int random_skip_ = 0; - io::Store* store_ = nullptr; - vector<std::string> buf_keys_, buf_vals_; - std::thread *thread_ = nullptr; // prefetching thread -}; - -/** - * Base layer for parsing a key-value tuple as a feature vector with fixed - * length. The feature shape is indicated by users in the configuration. - * Each tuple may has a label. - */ -class SingleLabelRecordLayer : public StoreInputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - protected: - /** - * Load a single record (tuple), e.g., the mean or standard variance vector. - */ - virtual void LoadRecord(const string& backend, const string& path, - Blob<float>* to) = 0; - - protected: - /** - * Feature standardization by processing each feature dimension via - * @f$ y = (x - mu)/ std @f$ - * <a href= "http://ufldl.stanford.edu/wiki/index.php/Data_Preprocessing"> - * UFLDL</a> - */ - Blob<float> mean_, std_; -}; -/** - * Specific layer that parses the value string loaded by Store as a line from - * a CSV file. - * - * It assumes the first column is the label except that has_label_ is configured - * to false. Or the data is used in deploy mode. - */ -class CSVInputLayer : public SingleLabelRecordLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - - protected: - bool Parse(int k, int flag, const string& key, const string& val) override; - void LoadRecord(const string& backend, - const string& path, - Blob<float>* to) override; - - private: - std::string sep_; - bool has_label_; -}; - - -/** - * Specific layer that parses the value string loaded by Store into a - * RecordProto. - */ -class RecordInputLayer : public SingleLabelRecordLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - - protected: - /** - * Parse key as instance ID and val into RecordProto. - * @copydetails StoreInputLayer::Parse() - */ - bool Parse(int k, int flag, const string& key, const string& val) override; - void LoadRecord(const string& backend, - const string& path, - Blob<float>* to) override; - - private: - // TODO(wangwei) decode the image - bool encoded_; -}; - -/** - * Do preprocessing for images, including cropping, mirroring, resizing. - */ -class ImagePreprocessLayer : public InputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers); - - private: - bool mirror_ = false; - int cropsize_ = 0; - int resize_ = 0; - float scale_ = 1; -}; - -class OneHotLayer : public InputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers); - - private: - int batchsize_, dim_; -}; - -/** - * * Read the ASCII file as a large string used for RNN model where each character - * * is a single input to the unrolled RNN layer. - * * max string length is string::max_size(); - * */ -class CharRNNInputLayer : public InputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers); - - private: - int batchsize_ = 0, unroll_len_ = 1; - unsigned offset_ = 0; - string path_, vocab_path_; - string buf_; - vector<int> start_; - std::unordered_map<char, int> char2index_; -}; - -/** - * Label layer for fetching labels from the src input layer for RNN models. - * The i-th unrolled layer fetch label from the input layer via data(i+1). - * Particularly, it shares data_ Blob with data(i+1) of its src layer. - */ -class RNNLabelLayer : public InputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers); - void ComputeFeature(int flag, const vector<Layer*>& srclayers); -}; - - -/****************Deprecated layers******************/ -/** - * @deprecated please use the StoreInputLayer. - * - * Base layer for reading ::Record from local Shard, HDFS, lmdb, etc. - */ -class DataLayer: virtual public InputLayer { - public: - Blob<float>* mutable_data(const Layer* layer) override { return nullptr; } - ConnectionType dst_layer_connection() const override { - return kOneToMany; - } - - inline int batchsize() const { return batchsize_; } - virtual const Record& sample() const { - return sample_; - } - /** - * @return the loaded records - */ - virtual const std::vector<Record>& records() const { - return records_; - } - - protected: - int random_skip_; - int batchsize_; - Record sample_; - std::vector<Record> records_; -}; -/** - * @deprecated Please use the subclasses of StoreInputLayer. - * - * Layer for loading Record from DataShard. - */ -class ShardDataLayer : public DataLayer { - public: - ~ShardDataLayer(); - - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - private: - DataShard* shard_; -}; -/** - * @deprecated please use the subclasses of StoreInputLayer. - * - * Layer for loading Record from LMDB. - */ -#ifdef USE_LMDB -#include <lmdb.h> -class LMDBDataLayer : public DataLayer { - public: - ~LMDBDataLayer(); - - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void OpenLMDB(const std::string& path); - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ConvertCaffeDatumToRecord(const CaffeDatum& datum, - SingleLabelImageRecord* record); - - private: - MDB_env* mdb_env_; - MDB_dbi mdb_dbi_; - MDB_txn* mdb_txn_; - MDB_cursor* mdb_cursor_; - MDB_val mdb_key_, mdb_value_; -}; -#endif - -/******************Parser layers***************/ -/** - * @deprecated Please use the subclasses of StoreInputLayer which load and parse - * data in a single layer. - * - * Base layer for parsing the input records into Blobs. - */ -class ParserLayer : public InputLayer { - public: - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {} - ConnectionType dst_layer_connection() const override { - return kOneToMany; - } - /** - * Parse records from DataLayer into blob. - */ - virtual void ParseRecords(int flag, const std::vector<Record>& records, - Blob<float>* blob) = 0; -}; -/** - * - * @deprecated Please use the SingleLabelRecordLayer which parses both feature - * and label for each record. Its aux_data() function returns the parsed labels. - * - * Derived from ParserLayer to parse label in SingaleLabelImageRecord loaded by - * ShardDataLayer. - */ -class LabelLayer : public ParserLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ParseRecords(int flag, const std::vector<Record>& records, - Blob<float>* blob) override; -}; - -/** - * @deprecated Please use the subclasses of StoreInputLayer. - * - * Derived from ParserLayer to parse MNIST feature from SingaleLabelImageRecord. - */ -class MnistLayer : public ParserLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ParseRecords(int flag, const std::vector<Record>& records, - Blob<float>* blob) override; - - protected: - float norm_a_, norm_b_; -}; -/** - * @deprecated please use the ImagePreprocessLayer which preprocess image - * feature from data Blob of source layers. - * - * Derived from ParserLayer to parse RGB image feature from - * SingaleLabelImageRecord. - */ -class RGBImageLayer : public ParserLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ParseRecords(int flag, const std::vector<Record>& records, - Blob<float>* blob) override; - - private: - float scale_; - int cropsize_; - bool mirror_; - Blob<float> mean_; -}; -} // namespace singa - -#endif // SINGA_NEURALNET_INPUT_LAYER_H_
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/layer.h b/include/singa/neuralnet/layer.h deleted file mode 100644 index c8ea3fc..0000000 --- a/include/singa/neuralnet/layer.h +++ /dev/null @@ -1,376 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_LAYER_H_ -#define SINGA_NEURALNET_LAYER_H_ - -#include <string> -#include <vector> -#include "singa/proto/common.pb.h" -#include "singa/proto/job.pb.h" -#include "singa/utils/common.h" -#include "singa/utils/blob.h" -#include "singa/utils/param.h" - -namespace singa { -using std::vector; -using std::string; - -// TODO(wangwei) make AuxType a template argument for Layer. -using AuxType = int; - -inline const string AddUnrollingPrefix(int unroll_idx, const string& name) { - return std::to_string(unroll_idx) + "#" + name; -} -inline const string AddPartitionSuffix(int partition_idx, const string& name) { - return name + "@" + std::to_string(partition_idx); -} - - -inline const string AddPrefixSuffix(int unroll_idx, int partition_idx, - const string& name) { - return std::to_string(unroll_idx) + "#" + name + "@" + - std::to_string(partition_idx); -} -/** - * Base layer class. - * - * Subclasses should implement at least - * Layer::ComputeFeature() and Layer::ComputGradient() - * functions in accordance with the NeuralNet::TrainOneBatch function. - */ - -class Layer { - public: - /** - * Create a sub-layer instance based on proto.type(); - * - * @param proto configuration of the layer instance. - * @return pointer to the newly created layer instance. - */ - static Layer* Create(const LayerProto& proto); - - Layer() {} - virtual ~Layer() {} - - /** - * Create for python binding, production test mode - * - */ - static Layer* CreateLayer(const string str); - static void SetupLayer(Layer* layer, const string str, const vector<Layer*>& srclayers); - - /** - * Setup layer properties. - * - * Setup members e.g., shapes of Param objects based on the layer - * configuration and connected layers. - * It should check the partition setting when setup the properties. - * - * @param conf layer configuration. - * @param srclayers source layers that connect to this layer. - */ - virtual void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) { - layer_conf_ = conf; - datavec_.push_back(&data_); - gradvec_.push_back(&grad_); - } - - - /** - * Compute features of this layer based on connected layers. - * - * @param[in] flag set by the TrainOneBatch function, e.g., to indicate the - * running phase (kForward|kTrain, kForward|kTest, etc). - * @param[in] srclayers source layers that connect to this layer. - */ - virtual void ComputeFeature(int flag, const vector<Layer*>& srclayers) = 0; - /** - * Compute gradients for parameters associated with this layer. - * It may also compute the gradients of the loss w.r.t the source layers. - * - * \copydetails ComputeFeature(). - */ - virtual void ComputeGradient(int flag, const vector<Layer*>& srclayers) = 0; - /** - * Layers that have paramters must override this function to return all Param - * objects associated with this layer. - * - * @return parameters associated with this layer. - */ - virtual const std::vector<Param*> GetParams() const { - return std::vector<Param*> {}; - } - virtual void SetParams(std::vector<Param*>) {} - /** - * Return the connection type between one neuron of this layer and its source - * layer. - * - * Currently support two connection types: kOneToOne, and kOneToAll. - * - kOneToOne indicates the neuron depends on only one neuron from src layer. - * - kOneToAll indicates the neuron depends on all neurons from src layer. - * TODO(wangwei) support kOneToMany. - * - * @param[in] k index of source layer, current only support k = 0. - * @return connection type. - */ - virtual ConnectionType src_neuron_connection(int k) const { - // CHECK_LT(k, srclayers_.size()); - return kOneToOne; - } - /** - * Return the connection type of this layer and all dst layers. - * - * Currently support two connection types: kOneToOne, and kOneToMany. - * - kOneToOne indicates the users implement the ComputeFeature and - * ComputeGradient function considering only one dst layer. In this case, - * a SplitLayer will be added automatically to connect this layer with all - * dest layer. - * - kOneToMany indicates this layer has already considered multiple dst - * layers in the implementation. - * - * @return connection type default is kOneToOne. - */ - virtual ConnectionType dst_layer_connection() const { - return kOneToOne; - } - /** - * To display layer info, e.g., aggreated loss/accuracy, or norm of feature - * vector and norm of parameters. - * - * @param[in] debug whether print the debug info - * @param[in] flag used to get the calling phase, e.g., forward of training - * (kForward | kTrain). - * @return info string about this layer, which is printed into the log. - */ - virtual const std::string ToString(bool debug, int flag); - /** - * @return partition dimension of this layer, - * - -1 for no partition. - * - 0 for partition on the data dimension, i.e., partitioning the mini-batch - * into sub-mini-batches. - * - 1 for partition this layer on feature dimension, i.e., the feature - * vector of each instance is partitioned into sub-vectors. - */ - inline int partition_dim() const { - CHECK_LE(layer_conf_.partition_dim(), 1); - return layer_conf_.partition_dim(); - } - /** - * @return the partition ID (i.e., the worker ID to whom is layer is - * dispatched) of this layer, which is a sublayer partitioned from the - * original layer. - */ - inline int partition_id() const { return layer_conf_.partition_id(); } - /** - * @return total number of partitions (i.e., sub-layers) of the original - * layer of this layer. - */ - inline int num_partitions() const { return layer_conf_.num_partitions(); } - /** - * @return the type of this layer, only valid for built-in layer (types). - */ - inline LayerType type() const { return layer_conf_.type(); } - /** - * @return user-defined layer type. - */ - inline const std::string& user_type() const { - return layer_conf_.user_type(); - } - /** - * Return name of this layer - */ - inline const std::string& name() const { return layer_conf_.name(); } - /** - * Return the index of the unrolled layer within the unrolling group, which - * should be [0, max_unrolling_length) - */ - inline const int unroll_index() const { return layer_conf_.unroll_index(); } - - /** - * @return a const ref for Blob vector storing feature values of this layer. - */ - virtual const vector<Blob<float>*>& data() { - return datavec_; - } - - /** - * @param[in] from pointer to one of the dst layer. For some layers, they have - * more than one data Blob. In this case, this argument identifies the layer - * that is requesting the data Blob. - * @return a const ref for Blob storing feature values of this layer. - * @deprecated {This function will be deleted, use - * virtual const vector<Blob<float>>& data() const or - * virtual const Blob<float>& data(int k) const instead}. - */ - virtual const Blob<float>& data(const Layer* from) { - return data_; - } - /** - * @return a const ref for the kth Blob. - * TODO(wangwei) if make this function const, there will be a warning - * indicating that data(const Layer*) and this function are ambiguous for - * data(0). - */ - virtual const Blob<float>& data(int k) { - return *datavec_.at(k); - } - - /** - * @see data(). - * @return the pointer to the Blob storing feature values of this layer. - * @deprecated {This function will be deleted, use - * virtual Blob<float>* mutable_data(int k) instead}. - */ - virtual Blob<float>* mutable_data(const Layer* from) { - return &data_; - } - /** - * @return the pointer to the kth Blob. - */ - virtual Blob<float>* mutable_data(int k) { - return datavec_.at(k); - } - /** - * @return auxiliary data, e.g., image label. - */ - virtual const vector<AuxType>& aux_data(const Layer* from = nullptr) { - return aux_data_; - } - /** - * @see data(). - * @return the const ref of the Blob for the gradient of this layer, mainly - * used in BP algorithm. - * @deprecated {This function will be deleted, use - * virtual const vector<Blob<float>>& grad() const or - * virtual const Blob<float>& grad(int k) const instead}. - */ - virtual const Blob<float>& grad(const Layer* from) { - return grad_; - } - /** - * @see data(). - * @return the const ref of the Blob vector for the gradient of this layer. - */ - virtual const vector<Blob<float>*>& grad() const { - return gradvec_; - } - /** - * @return the const ref of the kth Blob for the gradient of this layer. - */ - virtual const Blob<float>& grad(int k) const { - return *gradvec_.at(k); - } - /** - * @see data(). - * @return a pointer to the Blob storing gradients of this layer, mainly - * used in BP algorithm. - */ - virtual Blob<float>* mutable_grad(const Layer* from) { - return &grad_; - } - /** - * @see data(). - * @return a pointer to the kth Blob storing gradients of this layer, mainly - * used in BP algorithm. - */ - virtual Blob<float>* mutable_grad(int k) { - return gradvec_.at(k); - } - - protected: - LayerProto layer_conf_; - Blob<float> data_, grad_; - vector<AuxType> aux_data_; - vector<Blob<float>*> datavec_, gradvec_; -}; -/**************** Layer categories *****************/ -/** - * Base layer for connecting layers when neural net is partitioned. - */ -class ConnectionLayer : virtual public Layer { - // defined as a layer category -}; - - -/** - * Base layer for getting input data. May include layers for loading records, - * parsing records. - */ -class InputLayer : virtual public Layer { - public: - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {} - ConnectionType dst_layer_connection() const override { return kOneToMany; } - Blob<float>* mutable_grad(const Layer* layer) override { - return nullptr; - // LOG(FATAL) << "Input layer has no gradient blob"; - } - const Blob<float>& grad(const Layer* from) override { - return grad_; - // LOG(FATAL) << "Input layer has no gradient blob"; - } -}; - -using SingleLabelImageRecord = RecordProto; - -/** - * Base layer for feature transformation, e.g., ConvolutionLayer, PoolingLayer, - * etc. - */ -class NeuronLayer : virtual public Layer { - // defined as a layer category -}; - - -/** - * Base layer for calculating loss and doing BackPropagation. - */ -class LossLayer : virtual public Layer { - public: - Blob<float>* mutable_grad(const Layer* layer) override { - return nullptr; - // LOG(FATAL) << "Loss layer has no gradient blob"; - } - const Blob<float>& grad(const Layer* from) override { - return grad_; - // LOG(FATAL) << "Loss layer has no gradient blob"; - } -}; - -/** - * Base layer for collecting features into disk file, HTTP stream, etc. - */ -class OutputLayer : virtual public Layer { - public: - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override {} - Blob<float>* mutable_grad(const Layer* layer) override { - return nullptr; - // LOG(FATAL) << "Output layer has no gradient blob"; - } - const Blob<float>& grad(const Layer* from) override { - return grad_; - // LOG(FATAL) << "Output layer has no gradient blob"; - } -}; - - -} // namespace singa -#endif // SINGA_NEURALNET_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/loss_layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/loss_layer.h b/include/singa/neuralnet/loss_layer.h deleted file mode 100644 index 53ddc82..0000000 --- a/include/singa/neuralnet/loss_layer.h +++ /dev/null @@ -1,83 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_LOSS_LAYER_H_ -#define SINGA_NEURALNET_LOSS_LAYER_H_ - -#include <vector> -#include <string> -#include "singa/neuralnet/layer.h" -#include "singa/neuralnet/neuron_layer.h" - -namespace singa { -using std::vector; -/** - * Squared Euclidean loss as @f$0.5 ||p - t||^2@f$, where p is prediction - * result, t is the ground truth. - */ -class EuclideanLossLayer : public LossLayer { - public: - void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::string ToString(bool debug, int flag) override; - - private: - int counter_ = 0; - float loss_ = 0.0f; -}; -/** - * Cross-entropy loss applied to the probabilities computed from Softmax. - * @f$ L_i = -log P_{t_i}, t_i\in [0, C] @f$ is the label for the i-th object, - * C is the total number of classes. - */ -class SoftmaxLossLayer : public LossLayer { - public: - void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::string ToString(bool debug, int flag) override; - - private: - int batchsize_, topk_, dim_, counter_ = 0; - float scale_; - float loss_ = 0.0f, accuracy_ = 0.0f; -}; - -#ifdef USE_CUDNN -class CudnnSoftmaxLossLayer : public LossLayer{ - public: - void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::string ToString(bool debug, int flag) override; - - private: - int batchsize_, dim_; - int counter_ = 0; - float loss_ = 0.0f; - - CudnnSoftmaxLayer softmax_; -}; -#endif -} // namespace singa - -#endif // SINGA_NEURALNET_LOSS_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/neuralnet.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuralnet.h b/include/singa/neuralnet/neuralnet.h deleted file mode 100644 index 33ad38c..0000000 --- a/include/singa/neuralnet/neuralnet.h +++ /dev/null @@ -1,173 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_NEURALNET_H_ -#define SINGA_NEURALNET_NEURALNET_H_ - -#include <string> -#include <vector> -#include <unordered_map> - -#include "singa/neuralnet/layer.h" -#include "singa/proto/job.pb.h" -#include "singa/utils/factory.h" -#include "singa/utils/graph.h" - -namespace singa { -using std::unordered_map; -using std::string; -using std::vector; -/** - * The neural network is constructed from user configurations in NetProto. - * - * Some layers, e.g., SplitLayer and BridgeSrcLayer/BridgeDstLayer - * will be added implicitly to partition the neural network. - * TODO(wangwei) create wrappers for popular models, e.g., MLP, CNN. - */ -class NeuralNet { - public: - /** - * Create the neural network for training, test or validation. - * - * Parameters for test/validation net can share those from training after - * setup (done outside of this funcion). - * - * @param net_conf proto for the neural network - * @param phase test/training/validation - * @param npartitions num of partitions, do partitioning if num > 1 - * @return pointer to a neural net - */ - static NeuralNet* Create(const NetProto& net_conf, Phase phase, - int npartitions); - - static const NetProto Unrolling(const NetProto& net_conf); - /** - * construct the net structure from protocol buffer. - * @param netproto neural net config - * @param npartitions num of partitions. 1 for no partitioning. - */ - NeuralNet(NetProto net_conf, int num_partitions); - ~NeuralNet(); - /** - * Load net params from checkpoint fiels. - * @param path checkpoint files - */ - void Load(const vector<string>& path); - /** - * load specified Param objects from from checkpoint files. - * - * Param objects and blobs are matched based on name. - * The param from previous checkpoint files will be overwritten by - * the param with the same name in later checkpoint files. - * - * @param[in] path - * @param[in,out] params load Blobs with the same name as the Params in this - * this dictionary. The Param values are copied into the corresponding Param - * objects. - */ - static void Load(const vector<string>& path, - const unordered_map<string, Param*>& params); - /** - * To display the adjacency layers - std::string ToAdjacency(); - */ - /** - * Share memory of parameter values from other neuralnet - * @param[in] other the neural net from which to share the Params - * @param[in] cpu_only if true only share cpu memory; else, share both cpu - * and gpu memory. - */ - void ShareParamsFrom(NeuralNet* other, bool cpu_only); - inline const std::vector<Layer*>& layers() const { return layers_; } - inline const std::vector<Param*>& params() const { return params_; } - inline Layer* name2layer(std::string name) const { - if (name2layer_.find(name) == name2layer_.end()) - return nullptr; - else - return name2layer_.at(name); - } - inline const std::vector<Layer*>& srclayers(const Layer* layer) const { - CHECK(src_map_.find(layer) != src_map_.end()) - << "layer (" << layer->name() << " ) has no source layers"; - return src_map_.at(layer); - } - Layer* last_unroll_layer(const Layer* layer) const { - auto pos = layer->name().find("#"); - if (pos == std::string::npos) - return nullptr; - string last_name = std::to_string(unroll_len_) + layer->name().substr(pos); - CHECK(name2layer_.find(last_name) != name2layer_.end()) - << "layer name = " << last_name << " has no unroll layers"; - return name2layer_.at(last_name); - } - inline Param* paramid2param(int id) const { return paramid2param_.at(id); } - - /** - * Conver the neural net into graph representation. - * Each layer is converted into a node. - * @param include_shape if true label the node with shape info - */ - const Graph ToGraph(bool include_shape) const; - - protected: - /** - * Create a neural net graph, one node for each layer. - * - * Partition the graph if npartitions > 1, each layer is sliced according to - * its own partition setting. - * @param netproto - * @npartitions - * @return neural net graph - */ - Graph* CreateGraph(const NetProto& netproto, int num_partitions); - /** - * Create neural net from graph, one layer per node. - */ - void CreateNetFromGraph(Graph* graph); - /** - * prepare data structures, e.g., params_, layers_, etc. - */ - void PrepareDataStructures(); - void PrepareDataStructures(const NetProto& proto); - /** - * add split layers, due to connections to multiple dst-layers - */ - NetProto AddModelSplitLayers(const NetProto& netproto); - /** - * add connection layers, due to partition of the whole nerualnet - * this should be done after AddModelSplitLayers() - */ - NetProto AddPartitionConnectionLayers(const NetProto& netproto, - int npartitions); - - protected: - int unroll_len_ = 1; - std::vector<Layer*> layers_; - std::vector<Param*> params_; - - unordered_map<std::string, Layer*> name2layer_; - unordered_map<int, Param*> paramid2param_; - unordered_map<const Layer*, std::vector<Layer*>> src_map_; -}; - -} // namespace singa - -#endif // SINGA_NEURALNET_NEURALNET_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/neuron_layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/neuron_layer.h b/include/singa/neuralnet/neuron_layer.h deleted file mode 100644 index e6f0fd5..0000000 --- a/include/singa/neuralnet/neuron_layer.h +++ /dev/null @@ -1,560 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_NEURON_LAYER_H_ -#define SINGA_NEURALNET_NEURON_LAYER_H_ - -#include <vector> -#include <string> -#include "singa/neuralnet/layer.h" -#include "singa/proto/job.pb.h" -#include "singa/utils/context.h" -#include "singa/utils/singleton.h" - -#ifdef USE_CUDNN -#include <cudnn.h> -#endif - -namespace singa { - -/* Activation layer applies following activations, - * - "relu", @f$ f(x) = max(0, x)@f$ - * - "sigmoid", @f$ f(x)=1/(1+exp(-x)) @f$ - * - "tanh", @f$ f(x) = tanh(x) @f$ - * - "stanh", scaled tanh @f$f(x)=1.7159047 * tanh(0.66666667 * x)@f$, valid - * only for CPU training. - * It may share data and grad with its (single) source layer depending on - * the share_srclayer_blob configuration field. - */ -class ActivationLayer : public NeuronLayer { - public: - void Setup(const LayerProto& conf, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - bool share_with_srclayer = false; - std::string method_; -}; - -/** - * Convolution layer. - * Currently using Mshadow to do convolution operations. TODO(wangwei) remove - * dependency on Mshadow and using im2col from Caffe to implement this for CPU - * version. For GPU version, there is class CudnnConvLayer. - */ -class ConvolutionLayer : public NeuronLayer { - public: - ~ConvolutionLayer(); - - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params{weight_, bias_}; - return params; - } - ConnectionType src_neuron_connection(int k) const override { - // CHECK_LT(k, srclayers_.size()); - return kOneToAll; - } - - protected: - int kernel_x_, pad_x_, stride_x_; - int kernel_y_, pad_y_, stride_y_; - int batchsize_, channels_, height_, width_; - int col_height_, col_width_, conv_height_, conv_width_, num_filters_; - Param* weight_ = nullptr, *bias_ = nullptr; - Blob<float> col_data_, col_grad_; -}; - -/** - * Implement convolution operations using im2col from Caffe. - */ -class CConvolutionLayer : public ConvolutionLayer { - public: - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; -}; - -/** - * Layer that drops out some neurons randomly according to a user defined drop - * ratio (default is 0.5). It helps reduce overfitting. - */ -class DropoutLayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - protected: - // drop probability - float pdrop_; - /* record which neuron is dropped, required for back propagating gradients, - * if mask[i]=0, then the i-th neuron is dropped. - */ - Blob<float> mask_; -}; -/** - * This layer is dummy and do no real work. - * It is used for testing purpose only. - * - * Use it as input layer, it will generate random data; - * Use it as output layer, it will generate random grad; - * Use it as neuron layer, it will replicates data and grad. - */ -class DummyLayer: public NeuronLayer { - public: - void Setup(const std::string str, const vector<Layer*>& srclayers); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - void Feed(int batchsize, vector<float>& data, vector<int>& aux_data); - Layer* ToLayer() { return this;} - - private: - bool input_ = false; // use as input layer - bool output_ = false; // use as output layer - int batchsize_ = 1; // use for input layer -}; - -/** - * Embedding layer that converts an array of index ID into a matrix. - * - * Each index ID corresponds to a word (or feature) vector in the vocabulary - * matrix maintained by the embedding layer. - * The index ID ranges within [0, |D|), where |D| is the size of the vocabulary, - * i.e., the number of rows of the vocabulary matrix. - * If the index is -1, which means it is a padding word. A feature vector with - * all values 0 will be constructed and inserted into the feature Blob. - * Users handle special words by themseleves. For example, the index 0 could be - * the starting word/symbol of a sentence, the index 1 could be the ending - * word/symbol of a sentence. - */ -class EmbeddingLayer : public NeuronLayer { - public: - ~EmbeddingLayer() { - delete vocab_; - } - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params; - params.push_back(vocab_); - return params; - } - - private: - int vocab_size_, feature_dim_, batchsize_; - //!< the vocabulary matrix to be learned - Param *vocab_; -}; - -class GRULayer : public NeuronLayer { - public: - ~GRULayer(); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - ConnectionType dst_layer_connection() const override { - return kOneToMany; - } - Blob<float>* mutable_grad(const Layer* from) override { - if (typeid(*from) == typeid(GRULayer)) - return gradvec_[1]; - else - return gradvec_[0]; - } - const Blob<float>& grad(const Layer* from) override { - if (typeid(*from) == typeid(GRULayer)) - return *gradvec_[1]; - else - return *gradvec_[0]; - } - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params{weight_z_hx_, weight_r_hx_, weight_c_hx_, - weight_z_hh_, weight_r_hh_, weight_c_hh_}; - - if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) { - params.push_back(bias_z_); - params.push_back(bias_r_); - params.push_back(bias_c_); - } - return params; - } - - private: - int batchsize_; // batch size - int vdim_, hdim_; // dimensions - Blob<float> *update_gate_, *reset_gate_, *new_memory_; - Param *weight_z_hx_, *weight_z_hh_, *bias_z_; // update gate - Param *weight_r_hx_, *weight_r_hh_, *bias_r_; // reset gate - Param *weight_c_hx_, *weight_c_hh_, *bias_c_; // new memory -}; - -/** - * Layer that applys linear transformations as - * @f$ h = v*W+b @f$, where W and b are weight matrix and bias vector. - */ -class InnerProductLayer : public NeuronLayer { - public: - ~InnerProductLayer(); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - ConnectionType src_neuron_connection(int k) const override { - return kOneToAll; - } - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params{weight_, bias_}; - return params; - } - - void SetParams(std::vector<Param*> params) { - weight_ = params.at(0); - bias_ = params.at(1); - } - - private: - int batchsize_; - int vdim_, hdim_; - bool transpose_; - Param *weight_, *bias_; -}; - -/** - * Local Response Normalization edge - * - * @f$ b_i=a_i/x_i^beta @f$ - * @f$x_i=knorm+alpha*\sum_{j=max(0,i-n/2)}^{min(N,i+n/2)}(a_j)^2 @f$ - * n is size of local response area. - * @f$a_i@f$, the activation (after ReLU) of a neuron convolved with the i-th kernel. - * @f$b_i@f$, the neuron after normalization, N is the total num of kernels - */ -class LRNLayer : public NeuronLayer { - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - //!< shape of the feature blob of the src layer - int batchsize_, channels_, height_, width_; - //!< size local response (neighbor) area - int lsize_; - //!< hyper-parameter - float alpha_, beta_, knorm_; - Blob<float> norm_; -}; - -/** - * Layer that applies the pooling operation. - * TODO(wangwei) remove dependenices on mshadow - */ -class PoolingLayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - int kernel_x_, pad_x_, stride_x_; - int kernel_y_, pad_y_, stride_y_; - int batchsize_, channels_, height_, width_, pooled_height_, pooled_width_; - PoolingProto_PoolMethod pool_; -}; -/** - * Use book-keeping for BP following Caffe's pooling implementation - */ -class CPoolingLayer : public PoolingLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers); - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - private: - Blob<float> mask_; -}; - -/** - * @deprecated {please use ActivationLayer} - */ -class ReLULayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; -}; - -/** - * Softmax layer applies softmax transformation to features from source layers. - * The feature blob of this layer is of shape (batchsize, - * num_softmax_per_instance, count_per_softmax), where num_softmax_per_instance - * is controled by users (default is 1), - * @f$ count_per_softmax = count / batchsize / num_softmax_per_instance @f$. - * The softmax is conducted over count_per_softmax elements each time. - */ -class SoftmaxLayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - /** - * This layer is not recommendeded for partition because it requires the whole - * src layer for normalization. - */ - ConnectionType src_neuron_connection(int k) const override { - // CHECK_LT(k, srclayers_.size()); - return kOneToAll; - } - protected: - int batchsize_, dim_; - //!< set by users (default is 1) - // int num_softmax_per_instance_; - //!< size of the softmax area/length - // int count_per_softmax_; -}; -/** - * @deprecated {please use ActivationLayer} - * - * This layer apply Sigmoid function to neuron activations. - * f(x)=1/(1+exp(-x)) - * f'(x)=f(x)*(1-f(x)) - */ -class SigmoidLayer: public Layer { - public: - using Layer::ComputeFeature; - using Layer::ComputeGradient; - - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; -}; - -/** - * @deprecated {please use ActivationLayer} - * This layer apply scaled Tanh function to neuron activations. - * f(x)=1.7159047 tanh(0.66666667 x) - */ -class STanhLayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; -}; - - -class BMLayer : public NeuronLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - protected: - Param *bnScale_, *bnBias_; - Param *resultRunningMean_, *resultRunningInvVariance_; - int batchsize_, channels_, height_, width_; -}; - -/*************** Layers implemented using cudnn v3 ***************/ -#ifdef USE_CUDNN -#define CHECK_CUDNN(x) CHECK_EQ(x, CUDNN_STATUS_SUCCESS) - -class CudnnBase : virtual public NeuronLayer { - public: - ~CudnnBase() { - if (src_desc_ != nullptr) - CHECK_CUDNN(cudnnDestroyTensorDescriptor(src_desc_)); - if (my_desc_ != nullptr) - CHECK_CUDNN(cudnnDestroyTensorDescriptor(my_desc_)); - } - void virtual InitCudnn() { - CHECK(!has_init_cudnn_); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&src_desc_)); - CHECK_CUDNN(cudnnCreateTensorDescriptor(&my_desc_)); - handle_ = Singleton<Context>::Instance()->cudnn_handle(); - has_init_cudnn_ = true; - } - protected: - bool has_init_cudnn_ = false; - cudnnHandle_t handle_ = nullptr; - cudnnTensorDescriptor_t src_desc_ = nullptr, my_desc_ = nullptr; -}; - -/** - * Activation layer implemented using cudnn v3. - * Activation methods including - * - SIGMOID - * - TANH - * - RELU - */ -class CudnnActivationLayer : public ActivationLayer, public CudnnBase { - public: - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - cudnnActivationMode_t mode_; -}; - -/** - * Convolution layer implemeneted using cudnn (v3 version backward functions). - */ -class CudnnConvLayer : public ConvolutionLayer, public CudnnBase { - public: - ~CudnnConvLayer(); - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - cudnnTensorDescriptor_t bias_desc_; - cudnnFilterDescriptor_t filter_desc_; - cudnnConvolutionDescriptor_t conv_desc_; - cudnnConvolutionFwdAlgo_t fp_alg_; - cudnnConvolutionBwdFilterAlgo_t bp_filter_alg_; - cudnnConvolutionBwdDataAlgo_t bp_data_alg_; - size_t workspace_byte_limit_, workspace_count_; -}; - -class CudnnLRNLayer : public LRNLayer, public CudnnBase { - public: - ~CudnnLRNLayer(); - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - cudnnLRNMode_t mode_; - cudnnLRNDescriptor_t norm_desc_; -}; -/** - * Pooling layer implemented using cudnn. - */ -class CudnnPoolLayer : public PoolingLayer, public CudnnBase { - public: - ~CudnnPoolLayer(); - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - protected: - cudnnPoolingDescriptor_t pool_desc_; -}; - -/** - * Cudnn Softmax layer. - */ -class CudnnSoftmaxLayer : public SoftmaxLayer, public CudnnBase { - public: - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; -}; - - -#if CUDNN_MAJOR == 4 -/** - * Cudnn Batch Normalization layer -- supported by cudnn_v4 - */ -class CudnnBMLayer : public BMLayer, public CudnnBase { - public: - ~CudnnBMLayer(); - void InitCudnn() override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params{bnScale_, bnBias_, - resultRunningMean_, resultRunningInvVariance_}; - return params; - } - protected: - cudnnBatchNormMode_t mode_; - cudnnTensorDescriptor_t bnScaleBiasMeanVar_desc_; - cudnnTensorDescriptor_t bnScaleBiasDiff_desc_; - Blob<float> resultSaveMean_; - Blob<float> resultSaveInvVariance_; -}; -#endif -#endif // USE_CUDNN - -/******************** RBM layers *****************/ -/** - * Base layer for RBM models. - */ -class RBMLayer: virtual public Layer { - public: - virtual ~RBMLayer() {} - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - const std::vector<Param*> GetParams() const override { - std::vector<Param*> params{weight_, bias_}; - return params; - } - virtual Blob<float>* Sample(int flat); - - protected: - //! if ture, sampling according to guassian distribution - bool gaussian_; - //! dimension of the hidden layer - int hdim_; - //! dimension of the visible layer - int vdim_; - int batchsize_; - bool first_gibbs_; - Param* weight_, *bias_; - Blob<float> pos_data_; - Blob<float> neg_data_; - Blob<float> neg_sample_; - Blob<float> pos_sample_; -}; - -/** - * RBM visible layer - */ -class RBMVisLayer: public RBMLayer, public LossLayer { - public: - ~RBMVisLayer(); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - const std::string ToString(bool debug, int flag) override; - - private: - RBMLayer* hid_layer_; - Layer* input_layer_; - float error_ = 0.0f; - int counter_ = 0; -}; -/** - * RBM hidden layer - */ -class RBMHidLayer: public RBMLayer { - public: - ~RBMHidLayer(); - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - void ComputeGradient(int flag, const vector<Layer*>& srclayers) override; - - private: - RBMLayer *vis_layer_; -}; - -} // namespace singa -#endif // SINGA_NEURALNET_NEURON_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/neuralnet/output_layer.h ---------------------------------------------------------------------- diff --git a/include/singa/neuralnet/output_layer.h b/include/singa/neuralnet/output_layer.h deleted file mode 100644 index 9071f33..0000000 --- a/include/singa/neuralnet/output_layer.h +++ /dev/null @@ -1,99 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_NEURALNET_OUTPUT_LAYER_H_ -#define SINGA_NEURALNET_OUTPUT_LAYER_H_ - -#include <vector> -#include <string> -#include "singa/neuralnet/layer.h" -#include "singa/io/store.h" - -namespace singa { -/** - * ArgSort layer used to get topk prediction labels. - * - * It sort the labels based on its score (e.g., probability) from large to - * small. Topk labels will be kepted in the data field. It should not be called - * during training because this layer does not implement ComputeGradient() - * function. - */ -class ArgSortLayer : public OutputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - protected: - int batchsize_, dim_; - int topk_; -}; - -class AccuracyLayer : public ArgSortLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - const std::string ToString(bool debug, int flag) override; - - private: - int counter_ = 0; - float accuracy_ = 0.0f; -}; -/** - * Output data (and label) for its source layer. - */ -class CSVOutputLayer : public OutputLayer { - public: - ~CSVOutputLayer() { delete store_; } - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - private: - int inst_ = 0; - io::Store* store_ = nullptr; -}; - -class RecordOutputLayer : public OutputLayer { - public: - ~RecordOutputLayer() { delete store_; } - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - private: - int inst_ = 0; //!< instance No. - io::Store* store_ = nullptr; -}; - -/** - * Output layer for char rnn model, which convert sample id back to char and - * dump to stdout. - */ -class CharRNNOutputLayer : public OutputLayer { - public: - void Setup(const LayerProto& proto, const vector<Layer*>& srclayers) override; - - void ComputeFeature(int flag, const vector<Layer*>& srclayers) override; - - private: - string vocab_; -}; - -} // namespace singa -#endif // SINGA_NEURALNET_OUTPUT_LAYER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/server.h ---------------------------------------------------------------------- diff --git a/include/singa/server.h b/include/singa/server.h deleted file mode 100644 index d95862d..0000000 --- a/include/singa/server.h +++ /dev/null @@ -1,135 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_SERVER_H_ -#define SINGA_SERVER_H_ - -#include <unordered_map> -#include <vector> -#include "singa/comm/socket.h" -#include "singa/proto/job.pb.h" -#include "singa/utils/param.h" -#include "singa/utils/updater.h" - -namespace singa { - - /* Repsond to worker's get/put/udpate request, and periodically syncing with - * other servers. - * - * Normally, the Server creates a response message for each request which - * will be sent back to the one who issued the request. However, if the request - * are not processed successfully, the original message will be returned. The - * sever does not know the returned message is a response or the original - * message. It just sends it to the router. The router will decided to - * re-send the request to the server or send it to the worker. - */ -class Server { - public: - ~Server(); - Server(int group_id, int server_id, - const JobProto& job_conf, - const std::vector<int>& slice2group, - const std::vector<int>& slice2server); - void Run(); - inline int grp_id() const { return grp_id_; } - inline int id() const { return id_; } - - protected: - /** - * Process GET request. - * - * @return the orignal message or a response message which contains the values - * of the Param with the request version. - */ - Msg* HandleGet(Msg** msg); - /** - * Process Update request. - * - * It waits until received the gradients from all workers from the same worker - * group. After updating, it responses to each sender with the new Param - * values. It may generate a sync message to the server group that maintains - * the global version of the updated Param (slice). - * - * Note: there is no counter for each worker group on the number of received - * update requests. Hence it is possible that the server would conduct the - * update when it receives x requests from group a and y requests from group - * b where x + y = group size. To avoid this problem, we can - * -# maintain request list for each group for each Param at the server side - * -# do not span a worker group among multiple nodes. then the updates from - * the same group would be locally aggregated on the worker node. And the - * server would conduct the update immediately after receiving the aggregated - * request. - * -# launch only one worker group. - * - * @return the orignal message or response message - */ - const std::vector<Msg*> HandleUpdate(Msg **msg); - /** - * Process PUT request. - * - * @return the original message or response message. If we don't want to - * acknowledge the put request, then return nullptr. - */ - Msg* HandlePut(Msg **msg); - /** - * Handle sync request from other server groups. - * - * It adds updates of Param (slice) from other server groups directly to - * local Param (slice). Currently, each Param (slice) has a master group, - * i.e., slice2group_[sliceid], which would receive such requests from all - * other server groups for the Param object. - * - * @param msg request msg containing the parameter updates - * @return response msg that contains the fresh parameter values. - */ - Msg* HandleSyncRequest(Msg** msg); - /** - * Handle sync response. - * - * The response msg includes the latest values of a Param object from the - * server group that maintainers this Param object. - * The local Param values are replaced with the addition result of local - * udpates since the sync request was sent and the received Param values. - * - * @param response message - */ - void HandleSyncResponse(Msg** msg); - - protected: - int grp_id_ = -1; - int id_ = -1; - Updater* updater_ = nullptr; - //!< map from slice ID to slice and deleted in the destructor - std::unordered_map<int, ParamEntry*> shard_; - std::vector<int> slice2group_, slice2server_; - //!< num of updates from last sync with master server group for a param/slice - std::vector<int> n_updates_; - //!< num of sync requests that have not been responded - std::vector<int> n_pending_sync_; - std::vector<Blob<float>> last_sync_; - std::unordered_map<int, std::vector<Msg*>> buffer_requests_; - - Dealer* dealer_; -}; - -} // namespace singa - -#endif // SINGA_SERVER_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/singa.h ---------------------------------------------------------------------- diff --git a/include/singa/singa.h b/include/singa/singa.h deleted file mode 100644 index 9bc5ba5..0000000 --- a/include/singa/singa.h +++ /dev/null @@ -1,37 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_SINGA_H_ -#define SINGA_SINGA_H_ - -#include "singa/comm/socket.h" -#include "singa/io/store.h" -#include "singa/neuralnet/neuralnet.h" -#include "singa/neuralnet/layer.h" -#include "singa/proto/job.pb.h" -#include "singa/proto/singa.pb.h" -#include "singa/utils/common.h" -#include "singa/utils/param.h" -#include "singa/utils/singleton.h" -#include "singa/utils/factory.h" -#include "singa/driver.h" - -#endif // SINGA_SINGA_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/stub.h ---------------------------------------------------------------------- diff --git a/include/singa/stub.h b/include/singa/stub.h deleted file mode 100644 index 4802535..0000000 --- a/include/singa/stub.h +++ /dev/null @@ -1,108 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_STUB_H_ -#define SINGA_STUB_H_ - -#include <queue> -#include <unordered_map> -#include <vector> -#include <string> -#include "singa/comm/socket.h" -#include "singa/neuralnet/neuralnet.h" -#include "singa/proto/job.pb.h" -#include "singa/proto/singa.pb.h" -#include "singa/utils/factory.h" -#include "singa/utils/param.h" -#include "singa/utils/singleton.h" -#include "singa/server.h" -#include "singa/worker.h" - -namespace singa { - -class Stub { - public: - ~Stub(); - /** - * Find an endpoint to bind. - */ - void Setup(); - /** - * The Stub instance runs this function in the main thread to handle (e.g., - * forward) messages from workers and servers. - * - * @param[in] slice2server the k-th value is the ID of the server that is in - * charge of updating the Param slice with ID k. Large Param objects are - * sliced into subsets for load-balance. Different subsets are updated by - * different servers. - */ - void Run(const vector<int>& slice2server, - const std::vector<Worker*>& workers, - const std::vector<Server*>& servers); - - void set_router(Router* router) { - router_ = router; - } - - protected: - /** - * Create a socket to send msg to the specified process - * @param dst_procs the dst process (logical) ID - * @return the newly created socket - */ - Dealer* CreateInterProcsDealer(int dst_procs); - /** - * Generate a request message to Get the parameter object. - */ - const std::vector<Msg*> HandleGetRequest(ParamEntry* entry, Msg** msg); - void HandleGetResponse(ParamEntry* entry, Msg** msg); - /** - * Generate a request message to Update the parameter object. - */ - const std::vector<Msg*> HandleUpdateRequest(ParamEntry* entry, Msg** msg); - /** - * Handle response msg from servers for the update requests. - */ - void HandleUpdateResponse(ParamEntry* entry, Msg** msg); - /** - * Generate a request message to Put the parameter object. - */ - const std::vector<Msg*> HandlePutRequest(ParamEntry* entry, Msg** msg); - /** - * Called by HandlePut, HandleUpdate and HandleGet functions - * @param type message type - * @param version param version - * @param entry - * @param msg - * @param ret generated messages - */ - void GenMsgs(int type, int version, ParamEntry* entry, - Msg* msg, std::vector<Msg*> *ret); - - - protected: - Router *router_ = nullptr; - std::vector<int> slice2server_; -}; - -} // namespace singa - -#endif // SINGA_STUB_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/blob.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/blob.h b/include/singa/utils/blob.h deleted file mode 100644 index 1a0a592..0000000 --- a/include/singa/utils/blob.h +++ /dev/null @@ -1,414 +0,0 @@ -/************************************************************** -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -/** - * The code is adapted from that of Caffe which is under BSD 2 Clause License. - * COPYRIGHT - * All contributions by the University of California: - * Copyright (c) 2014, The Regents of the University of California (Regents) - * All rights reserved. - * All other contributions: - * Copyright (c) 2014, the respective contributors - * All rights reserved. - */ -#ifndef SINGA_UTILS_BLOB_H_ -#define SINGA_UTILS_BLOB_H_ - -#include <glog/logging.h> -#include <memory> -#include <vector> -#include "singa/proto/common.pb.h" -#include "mshadow/tensor.h" -#include "mshadow/cxxnet_op.h" - -namespace singa { - -// TODO(wangwei) use cudaMallocHost depending on Context::device. -inline void MallocHost(void** ptr, size_t size) { - *ptr = malloc(size); - // cudaMallocHost(ptr, size); -} - -inline void FreeHost(void* ptr) { - free(ptr); - // cudaFreeHost(ptr); -} - -/** - * @brief Manages memory allocation and synchronization between the host (CPU) - * and device (GPU). - * - * TODO(dox): more thorough description. - */ -class SyncedMemory { - public: - enum SyncedHead { UNINITIALIZED, - HEAD_AT_CPU, - HEAD_AT_GPU, - SYNCED }; - - SyncedMemory() {} - explicit SyncedMemory(size_t size) : size_(size) {} - ~SyncedMemory(); - - const void* cpu_data(); - const void* gpu_data(); - void* mutable_cpu_data(); - void* mutable_gpu_data(); - void set_cpu_data(void* data); - inline SyncedHead head() { return head_; } - inline size_t size() { return size_; } - - private: - void to_cpu(); - void to_gpu(); - - void* cpu_ptr_ = nullptr; - void* gpu_ptr_ = nullptr; - size_t size_ = 0; - SyncedHead head_ = UNINITIALIZED; - bool own_cpu_data_ = false; -}; // class SyncedMemory - - -template <typename Dtype> -class Blob { - public: - Blob() {} - /** - * Blob constructor with given shape. - * @param shape specifies the size of each dimension, shape[0] is the highest - * dimension, i.e., stride[0] = shape[1] * shape[2] * ... - */ - explicit Blob(const std::vector<int>& shape) { Reshape(shape); } - /** - * Blob constructor with given shape. - * @param[in] dim0 total num of elements. - */ - explicit Blob(int dim0) { Reshape(dim0); } - /** - * Blob constructor with given shape. - * @param[in] dim0 size of the highest dimension - * @param[in] dim1 size of the second highest dimension - */ - explicit Blob(int dim0, int dim1) { Reshape(dim0, dim1); } - /** - * Blob constructor with given shape. - * @param[in] dim0 size of the highest dimension - * @param[in] dim1 - * @param[in] dim2 - */ - explicit Blob(int dim0, int dim1, int dim2) { Reshape(dim0, dim1, dim2); } - /** - * Blob constructor with given shape. - * @param[in] dim0 size of the highest dimension - * @param[in] dim1 - * @param[in] dim2 - * @param[in] dim3 - */ - explicit Blob(int dim0, int dim1, int dim2, int dim3) { - Reshape(dim0, dim1, dim2, dim3); - } - /** - * Change the shape of the blob, re-allocate memory if Blob size() changes. - * - * @param[in] shape specifies the size of each dimension, shape[0] is the - * highest * dimension, i.e., stride[0] = shape[1] * shape[2] * ... - */ - void Reshape(const std::vector<int>& shape); - /** - * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 1. - * - * @see Reshape(const std::vector<int>&). - * @param[in] dim0 total num of elements. - */ - void Reshape(int dim0) { - Reshape(std::vector<int>{dim0}); - } - /** - * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 2. - * - * @param dim0 the highest dimension size, i.e., dim0 = shape[0]. E.g., dim0 - * could the batchsize. - * @param[in] dim1, dim1 = shape[1], e.g., dim1 could be the length of the - * feature vector. - */ - void Reshape(int dim0, int dim1) { - Reshape(std::vector<int>{dim0, dim1}); - } - /** - * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 3. - * - * @param[in] dim0, dim0 = shape[0] - * @param[in] dim1, dim1 = shape[1] - * @param[in] dim2, dim2 = shape[2] - */ - void Reshape(int dim0, int dim1, int dim2) { - Reshape(std::vector<int>{dim0, dim1, dim2}); - } - /** - * Helper for Reshape(const std::vector<int>& shape) with shape.size() = 4. - * - * @param[in] dim0, dim0 = shape[0] - * @param[in] dim1, dim1 = shape[1] - * @param[in] dim2, dim2 = shape[2] - * @param[in] dim3, dim3 = shape[3] - */ - void Reshape(int dim0, int dim1, int dim2, int dim3) { - Reshape(std::vector<int>{dim0, dim1, dim2, dim3}); - } - /** - * Reshape as the shape of *other* Blob. - * @param[in] other - */ - void ReshapeLike(const Blob& other); - /** - * @brief Copy from a source Blob. - * - * @param source the Blob to copy from - * @param reshape if false, require this Blob to be pre-shaped to the shape - * of other (and die otherwise); if true, Reshape this Blob to other's - * shape if necessary - */ - void CopyFrom(const Blob<Dtype>& source, bool reshape); - /** - * call CopyFrom(const Blob<Dtype>& source, bool reshape) with reshape = false - */ - void CopyFrom(const Blob<Dtype>& source); - - void FromProto(const singa::BlobProto& proto); - void ToProto(singa::BlobProto* proto) const; - /** - * Set each element to be v - */ - void SetValue(Dtype v); - /** - * Compute the sum of absolute values (L1 norm) of the data. - Dtype AsumData() const; - */ - /** - * Sum all elements - Dtype SumData() const; - */ - /** - * Share data with the other Blob. - * Set the data_ shared_ptr to point to the SyncedMemory holding the data_ - * of Blob other. - * - * It may deallocate the SyncedMemory holding this Blob's data_, as - * shared_ptr calls its destructor when reset with the "=" operator. - * @param other the Blob who owns the data - * @param cpu_only if true, only share the cpu data; if false, share the whole - * data_ field. For training with multi-gpu cards, cpu_only must be true, - * becuase gpu memory cannot be shared among different devices. - */ - void ShareData(Blob* other, bool cpu_only = true); - - /* - void Swap(Blob& other); - */ - /** - * @return the shape vector. - */ - inline const std::vector<int>& shape() const { return shape_; } - /** - * @return the size of the k-th dimension. - */ - inline int shape(int k) const { - CHECK_LT(k, shape_.size()); - return shape_.at(k); - } - inline int count() const { - return count_; - } - inline int version() const { - return version_; - } - inline void set_version(int v) { - version_ = v; - } - inline const Dtype* cpu_data() const { - CHECK(data_); - return static_cast<const Dtype*>(data_->cpu_data()); - } - inline void set_cpu_data(Dtype* data) { - CHECK(data); - data_->set_cpu_data(data); - } - inline const Dtype* gpu_data() const { - CHECK(data_); - return static_cast<const Dtype*>(data_->gpu_data()); - } - inline Dtype* mutable_cpu_data() { - CHECK(data_); - return static_cast<Dtype*>(data_->mutable_cpu_data()); - } - inline Dtype* mutable_gpu_data() { - CHECK(data_); - return static_cast<Dtype*>(data_->mutable_gpu_data()); - } - inline void set_transpose(bool val) { - transpose_ = val; - } - inline bool transpose() const { - return transpose_; - } - inline const Blob<Dtype> T() const { - Blob<Dtype> ret(*this); - ret.transpose_ = !transpose_; - return ret; - } - // to check if two blob has the exact same content - bool check_equal(Blob* other) const { - if (transpose() != other->transpose()) return false; - if (count() != other->count()) return false; - if (shape().size() != other->shape().size()) return false; - for (unsigned int i = 0; i < shape().size(); i++) { - if (shape(i) != other->shape(i)) return false; - } - const Dtype * a = cpu_data(); - const Dtype * b = other->cpu_data(); - for (int i = 0; i < count(); i++) { - if (a[i] != b[i]) return false; - } - return true; - } - - protected: - std::shared_ptr<SyncedMemory> data_ = nullptr; - std::vector<int> shape_; - int count_ = 0; - int capacity_ = 0; - int version_ = -1; - bool transpose_ = false; -}; // class Blob - -/** - * Reshape a Blob. - * @return a new Blob with the given shape, it shares the internal data_ with - * the original Blob, i.e., no memory copy and allocation. - */ -template <typename Dtype> -Blob<Dtype>* Reshape(const Blob<Dtype> & A, const std::vector<int>& shape) { - Blob<Dtype>* res = new Blob<Dtype>(A); - res->Reshape(shape); - return res; -} - -/** - * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*). - */ -template <typename Dtype> -Blob<Dtype>* Reshape(const Blob<Dtype> & A, int count) { - std::vector<int> tmpshape; - tmpshape.push_back(count); - return Reshape(A, tmpshape); -} -/** - * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*). - */ -template <typename Dtype> -Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1) { - std::vector<int> tmpshape; - tmpshape.push_back(dim0); - tmpshape.push_back(dim1);; - return Reshape(A, tmpshape); -} -/** - * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*). - */ -template <typename Dtype> -Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1, int dim2) { - std::vector<int> tmpshape; - tmpshape.push_back(dim0); - tmpshape.push_back(dim1); - tmpshape.push_back(dim2); - return Reshape(A, tmpshape); -} -/** - * Helper of Reshape(const Blob<Dtype>, const std::vector<int>*). - */ -template <typename Dtype> -Blob<Dtype>* Reshape(const Blob<Dtype> & A, int dim0, int dim1, int dim2, - int dim3) { - std::vector<int> tmpshape; - tmpshape.push_back(dim0); - tmpshape.push_back(dim1); - tmpshape.push_back(dim2); - tmpshape.push_back(dim3); - return Reshape(A, tmpshape); -} - -/** - * @return a new Blob which share all internal members with the input Blob - * except that the transpose_ field is set to the opposite value. - */ -template <typename Dtype> -Blob<Dtype>* Transpose(const Blob<Dtype> & A) { - Blob<Dtype>* res = new Blob<Dtype>(A); - bool origin = A.transpose(); - res->set_transpose(!origin); - return res; -} - -// TODO(wangwei) remove mshadow functions. -using namespace mshadow; -using mshadow::cpu; - -using mshadow::Shape; -using mshadow::Shape1; -using mshadow::Shape2; -using mshadow::Shape3; -using mshadow::Shape4; -using mshadow::Tensor; - -using std::vector; - -inline Tensor<cpu, 4> Tensor4(Blob<float>* blob) { - const vector<int>& shape = blob->shape(); - Tensor<cpu, 4> tensor(blob->mutable_cpu_data(), - Shape4(shape[0], shape[1], shape[2], shape[3])); - return tensor; -} - -inline Tensor<cpu, 3> Tensor3(Blob<float>* blob) { - const vector<int>& shape = blob->shape(); - Tensor<cpu, 3> tensor(blob->mutable_cpu_data(), - Shape3(shape[0], shape[1], blob->count() / shape[0] / shape[1])); - return tensor; -} - -inline Tensor<cpu, 2> Tensor2(Blob<float>* blob) { - const vector<int>& shape = blob->shape(); - Tensor<cpu, 2> tensor(blob->mutable_cpu_data(), - Shape2(shape[0], blob->count() / shape[0])); - return tensor; -} - -inline Tensor<cpu, 1> Tensor1(Blob<float>* blob) { - Tensor<cpu, 1> tensor(blob->mutable_cpu_data(), Shape1(blob->count())); - return tensor; -} - - -} // namespace singa - -#endif // SINGA_UTILS_BLOB_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/dd1e4afa/include/singa/utils/cluster.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/cluster.h b/include/singa/utils/cluster.h deleted file mode 100644 index 9e36cf8..0000000 --- a/include/singa/utils/cluster.h +++ /dev/null @@ -1,161 +0,0 @@ -/************************************************************ -* -* Licensed to the Apache Software Foundation (ASF) under one -* or more contributor license agreements. See the NOTICE file -* distributed with this work for additional information -* regarding copyright ownership. The ASF licenses this file -* to you under the Apache License, Version 2.0 (the -* "License"); you may not use this file except in compliance -* with the License. You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, -* software distributed under the License is distributed on an -* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -* KIND, either express or implied. See the License for the -* specific language governing permissions and limitations -* under the License. -* -*************************************************************/ - -#ifndef SINGA_UTILS_CLUSTER_H_ -#define SINGA_UTILS_CLUSTER_H_ - -#include <glog/logging.h> -#include <string> -#include <unordered_map> -#include <memory> -#include <vector> -#include "singa/proto/job.pb.h" -#include "singa/proto/singa.pb.h" -#include "singa/utils/cluster_rt.h" -#include "singa/utils/common.h" -#include "singa/utils/singleton.h" - -namespace singa { - -/** - * Cluster is a singleton object, which provides cluster configuations, - * e.g., the topology of the cluster. - * All IDs start from 0. - */ -class Cluster { - public: - // Cluster is a global singleton in a process - static Cluster* Setup(int job_id, const SingaProto& singaConf, - const ClusterProto& clusterConf); - static Cluster* Get(); - - inline int nserver_groups() const { return cluster_.nserver_groups(); } - inline int nworker_groups() const { return cluster_.nworker_groups(); } - inline int nworkers_per_group() const { return cluster_.nworkers_per_group();} - inline int nservers_per_group() const { return cluster_.nservers_per_group();} - inline int nworkers_per_procs() const { return cluster_.nworkers_per_procs();} - inline int nservers_per_procs() const { return cluster_.nservers_per_procs();} - inline int nworker_groups_per_server_group() const { - if (nserver_groups() == 0 || nservers_per_group() == 0) - return 1; - else - return cluster_.nworker_groups() / cluster_.nserver_groups(); - } - /** - * @return true if the calling procs has server threads, otherwise false - */ - inline bool has_server() const { - if (server_worker_separate()) { - CHECK_LT(procs_id_, nprocs_); - return procs_id_ >= nworker_procs(); - } else { - return procs_id_ < nserver_procs(); - } - } - /** - * @return true if the calling procs has worker threads. - */ - inline bool has_worker() const { - return procs_id_ < nworker_procs(); - } - /** - * @return global procs id, which starts from 0. - */ - inline int procs_id() const { return procs_id_; } - inline void set_procs_id(int procs_id) { procs_id_ = procs_id; } - inline bool server_worker_separate() const { - return cluster_.server_worker_separate(); - } - inline int nworker_procs() const { - return nworker_groups() * nworkers_per_group() / nworkers_per_procs(); - } - inline int nserver_procs() const { - return nserver_groups() * nservers_per_group() / nservers_per_procs(); - } - inline int nprocs() const { return nprocs_; } - /** - * @return endpoint of the router of a procs with the specified id - */ - inline std::string endpoint(int procs_id) const { - CHECK_LT(procs_id, nprocs()); - CHECK_GE(procs_id, 0); - return cluster_rt_->GetProcHost(procs_id); - } - inline std::string workspace() const { return cluster_.workspace(); } - inline std::string vis_folder() const { - return cluster_.workspace() + "/visualization"; - } - inline std::string checkpoint_folder() const { - return cluster_.workspace() + "/checkpoint"; - } - /* - const int stub_timeout() const { return cluster_.stub_timeout(); } - const int worker_timeout() const { return cluster_.worker_timeout(); } - const int server_timeout() const { return cluster_.server_timeout(); } - */ - inline bool share_memory() const { return cluster_.share_memory(); } - inline int sync_freq() const { return cluster_.sync_freq(); } - inline int poll_time() const { return cluster_.poll_time(); } - ClusterRuntime* runtime() const { return cluster_rt_; } - - /** - * @return logical procs ID - */ - inline int ProcsIDOf(int group_id, int id, int flag) { - return procs_ids_.at(Hash(group_id, id, flag)); - } - - /** - * @param pid, processs ID - * @param group_size, num of executors in a group - * @param procs_size, num of executors in a procs - * - * @return a vector with 4 integers: - * [group start, group end), [start executor, end executor) - */ - const std::vector<int> ExecutorRng(int pid, int group_size, int procs_size); - /** - * Register this process. - * - * @param pid physical process id get from OS, all other procs ID refers to - * logical process ID. - * @param endpoint unique string for other procs to connect - */ - void Register(int pid, const std::string& endpoint); - - private: - void Init(int job, const SingaProto& singaConf, - const ClusterProto& clusterConf); - void SetupFolders(const ClusterProto &cluster); - int Hash(int gid, int id, int flag); - - int procs_id_ = -1; - int nprocs_ = 0; - // cluster config proto - ClusterProto cluster_; - SingaProto singa_; - ClusterRuntime* cluster_rt_ = nullptr; - std::unordered_map<int, int> procs_ids_; -}; - -} // namespace singa - -#endif // SINGA_UTILS_CLUSTER_H_
