Repository: incubator-singa Updated Branches: refs/heads/dev dd08f4130 -> 4db968c2e
SINGA-204 Support the training of feed-forward neural nets Draft FeedForwardNet including functions for 1. training, training for one mini-batch 2. evaluation, evaluation for one mini-batch Draf the Alexnet model for Cifar10 dataset, 1. Cifar10 class for reading the dataset 2. alexnet.cc for creating the FeedForwardNet and conducting the training. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/d826b2e8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/d826b2e8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/d826b2e8 Branch: refs/heads/dev Commit: d826b2e856b778b02b74be1659e990820538cffb Parents: dd08f41 Author: Wei Wang <[email protected]> Authored: Wed Jun 22 18:05:46 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Sat Jun 25 16:15:59 2016 +0800 ---------------------------------------------------------------------- examples/cifar10/alexnet.cc | 144 +++++++++++++++ examples/cifar10/cifar10.cc | 98 +++++++++++ examples/cifar10/download_data.py | 30 ++++ include/singa/core/tensor.h | 9 +- include/singa/model/feed_forward_net.h | 133 ++++++++++++++ include/singa/model/layer.h | 3 + include/singa/utils/singleton.h | 22 ++- include/singa/utils/string.h | 9 + src/core/tensor/tensor.cc | 18 +- src/model/feed_forward_net.cc | 263 ++++++++++++++++++++++++++++ 10 files changed, 708 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/examples/cifar10/alexnet.cc ---------------------------------------------------------------------- diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc new file mode 100644 index 0000000..2917dd2 --- /dev/null +++ b/examples/cifar10/alexnet.cc @@ -0,0 +1,144 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ +#include "./cifar10.h" +#include "singa/model/feed_forward_net.h" +#include "singa/model/optimizer.h" +#include "singa/model/initializer.h" + +namespace singa { + +LayerConf GenConvConf(string name, int nb_filter, int kernel, int stride, + int pad) { + LayerConf conf; + conf.set_name(name); + conf.set_type("CudnnConvolution"); + ConvolutionConf *conv = conf.mutable_convolution_conf(); + conv->set_num_output(nb_filter); + conv->set_kernel_size(kernel); + conv->set_stride(stride); + conv->set_pad(pad); + + FillerConf *weight = conv->mutable_weight_filler(); + weight->set_type("Xavier"); + return conf; +} + +LayerConf GenPoolingConf(string name, bool max_pool, int kernel, int stride, int pad) { + LayerConf conf; + conf.set_name(name); + conf.set_type("CudnnPooling"); + PoolingConf *pool = conf.mutable_pooling_conf(); + pool->set_kernel_size(kernel); + pool->set_stride(stride); + pool->set_pad(pad); + if (!max_pool) + pool->set_pool(PoolingConf_AVE); + return conf; +} + +LayerConf GenReLUConf(string name) { + LayerConf conf; + conf.set_name(name); + conf.set_type("RELU"); + return conf; +} + +LayerConf GenDenseConf(string name, int num_output) { + LayerConf conf; + conf.set_name(name); + conf.set_type("Dense"); + DenseConf *dense = conf->mutable_dense_conf(); + dense->set_num_output(num_output); + FillerConf *weight = conv->mutable_weight_filler(); + weight->set_type("Xavier"); + return conf; +} + +LayerConf GenSoftmaxConf(string name) { + LayerConf conf; + conf.set_name(name); + conf.set_type("CudnnSoftmax"); + return conf; +} + + +FeedForwordNet CreateNet(Optimizer* opt, Loss* loss, Metric* metric) { + FeedForwordNet net; + Shape s{3, 32, 32}; + net.AddLayer(GenConvConf("conv1", 32, 5, 1, 2), &s); + net.AddLayer(GenReLUConf("relu1")); + net.AddLayer(GenConvConf("pool1", 3, 2, 0)); + net.AddLayer(GenConvConf("conv2", 32, 5, 1, 2)); + net.AddLayer(GenReLUConf("relu2")); + net.AddLayer(GenConvConf("pool2", 3, 2, 0)); + net.AddLayer(GenConvConf("conv3", 64, 5, 1, 2)); + net.AddLayer(GenReLUConf("relu3")); + net.AddLayer(GenConvConf("pool3", 3, 2, 0)); + net.AddLayer(GenDenseConf("ip1", 10)); + net.AddLayer(GenSoftmaxConf("softmax")); + + OptimizerConf opt_conf; + opt_conf.set_momentum(0.9); + opt->Setup(opt_conf); + net.Compile(true, opt, loss, metric); + return net; +} + +void Train(float lr, int num_epoch, string data_dir) { + SoftmaxCrossEntropy loss; + Accuracy acc; + SGD sgd; + sgd.SetLearningRate([lr](int step) {return lr;}); + auto net = CreateNet(&opt, &loss, &metric); + Cifar10 data(data_dir); + Tensor train_x, tain_y, test_x, test_y; + { + auto train = data.ReadTrainData(); + const auto mean = Average(train.first, 0); + train_x = SubRow(train.first, mean); + auto test = data.ReadTestData(); + test_x = SubRow(test.first, mean); + train_y = train.second; + test_y = test.second; + } + net.Train(100, num_epoch, train_x, train_y, test_x, test_y); +} + +int main(int argc, char** argv) { + InitChannel(); + int pos = ArgPos(argc, argv, "-epoch"); + int nEpoch = 5; + if (pos != -1) + nEpoch = atoi(argv[pos + 1]); + pos = ArgPos(argc, argv, "-lr"); + float lr = 0.01; + if (pos != -1) + lr = atof(argv[pos + 1]); + pos = ArgPos(argc, argv, "-data"); + string data = "cifar-10-batch-bin"; + if (pos != -1) + data = argv[pos + 1]; + + LOG(INFO) << "Start training"; + Train(lr, nEpoch, data); + LOG(INFO) << "End training"; +} +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/examples/cifar10/cifar10.cc ---------------------------------------------------------------------- diff --git a/examples/cifar10/cifar10.cc b/examples/cifar10/cifar10.cc new file mode 100644 index 0000000..7efc18f --- /dev/null +++ b/examples/cifar10/cifar10.cc @@ -0,0 +1,98 @@ +/************************************************************ +* +* Licensed to the Apache Software Foundation (ASF) under one +* or more contributor license agreements. See the NOTICE file +* distributed with this work for additional information +* regarding copyright ownership. The ASF licenses this file +* to you under the Apache License, Version 2.0 (the +* "License"); you may not use this file except in compliance +* with the License. You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an +* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +* KIND, either express or implied. See the License for the +* specific language governing permissions and limitations +* under the License. +* +*************************************************************/ +#include <fstream> +#include <string> +#include <cstdint> +#include <iostream> + +using std::string; +namespace singa { +/// For reading cifar10 binary data as tensors. +class Cifar10 { + public: + /// 'dir_path': path to the folder including the *.bin files + Cifar10(string dir_path, bool normalize = true) + : dir_path_(dir_path), normalize_(normalize) {} + + /// read all training data into an image Tensor and a label Tensor + const std::pair<Tensor, Tensor> ReadTrainData(bool shuffle = false); + /// read all test data into an image Tensor and a label Tensor + const std::pair<Tensor, Tensor> ReadTestData(); + /// read data from one file into an image Tensor and a label Tensor + const std::pair<Tensor, Tensor> ReadFile(string file, bool shuffle = false); + + private: + const int kImageSize = 32; + const int kImageVol = 3072; + const int kBatchSize = 10000; + const int kTrainFiles = 5; + + string dir_path_; + bool normalize_; +}; + +void read_image(std::ifstream* file, int* label, char* buffer) { + char label_char; + file->read(&label_char, 1); + *label = label_char; + file->read(buffer, kImageVol); + return; +} +const std::pair<Tensor, Tensor> Cifar10::ReadFile(string file, + bool shuffle = false) { + Tensor images(Shape{kTrainFiles, 3, kImageSize, kImageSize}); + Tensor labels(Shape{kTrainFiles}, kInt); + if (dir_path_.back() != '/') dir_path_.push_back('/'); + LOG(INFO) << "Reading file " << dir_path_ + file; + std::ifstream data_file((dir_path_ + file).c_str(), + std::ios::in | std::ios::binary); + CHECK(data_file.is_open()) << "Unable to open file " << file; + int label; + char image[kImageVol]; + float float_image[kImageVol]; + int tmplabels[kBatchSize]; + for (int itemid = 0; itemid < kBatchSize; ++itemid) { + read_image(&data_file, &label, image); + for (int i = 0; i < kImageVol; i++) + float_image[i] = static_cast<float>(static_cast<int>(image[i])); + images.CopyDataFromHostPtr(float_image, kImageVol, itemid * kImageVol); + tmplabels[itemid] = label; + } + labels.CopyDataFromHostPtr(tmplabels, kBatchSize); + return std::make_pair(images, labels); +} + +const std::pair<Tensor, Tensor> Cifar10::ReadTrainData(bool shuffle = false) { + Tensor images(Shape{kBatchSize * kTrainFiles, 3, kImageSize, kImageSize}); + Tensor labels(Shape{kBatchSize * kTrainFiles, 3, kImageSize, kImageSize}); + for (int fileid = 0; fileid < kTrainFiles; ++fileid) { + string file = "data_batch_" + std::to_string(fileid + 1) + ".bin"; + const auto ret = ReadFile(file); + CopyDataToFrom(&images, ret.first, ret.first.Size(), + fileid * ret.first.Size()); + CopyDataToFrom(&labels, ret.second, kBatchSize, fileid * kBatchSize); + } + return std::make_pair(images, labels); +} +const std::pair<Tensor, Tensor> Cifar10::ReadTrainData() { + return ReadFile("test_batch.bin"); +} +} // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/examples/cifar10/download_data.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/download_data.py b/examples/cifar10/download_data.py new file mode 100644 index 0000000..ffb9724 --- /dev/null +++ b/examples/cifar10/download_data.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +import urllib +import tarfile +import os +import sys + + +def extract_tarfile(filepath): + with tarfile.open(filepath, 'r') as f: + f.extractall('.') + + +dirpath = 'cifar-10-batches-bin' +gzfile = 'cifar-10-binary' + '.tar.gz' +if os.path.exists(dirpath): + print 'Directory %s does exist. To redownload the files, '\ + 'remove the existing directory and %s.tar.gz' % (dirpath, dirpath) + sys.exit(0) + +if os.path.exists(gzfile): + print 'The tar file does exist. Extracting it now..' + extract_tarfile(gzfile) + print 'Finished!' + sys.exit(0) + +url = 'http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz' +print 'Downloading CIFAR10 from %s' % (url) +urllib.urlretrieve(url, gzfile) +extract_tarfile(gzfile) +print 'Finished!' http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index a4f42db..6de5c0c 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -21,6 +21,7 @@ #include <vector> #include <tuple> +#include <memory> #include "singa/core/common.h" #include "singa/core/device.h" @@ -127,14 +128,16 @@ class Tensor { /// For init the tensor values, copy 'num' elements. template <typename SType> - void CopyDataFromHostPtr(const SType *src, const size_t num); + void CopyDataFromHostPtr(const SType *src, const size_t num, + const size_t offset = 0); /// Copy data from another Tensor which may be on a diff device. /// Meta data would not be copied! void CopyData(const Tensor &other); - /// return an exactly the same Tensor with data been deep copied. - Tensor Clone() const; + /// return an exactly the same Tensor with data been deep copied to the given + /// device. If 'device' is nullptr, then clone it one the current device. + Tensor Clone(std::shared_ptr<Device> device = nullptr) const; // Tensor operations http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/include/singa/model/feed_forward_net.h ---------------------------------------------------------------------- diff --git a/include/singa/model/feed_forward_net.h b/include/singa/model/feed_forward_net.h new file mode 100644 index 0000000..173600b --- /dev/null +++ b/include/singa/model/feed_forward_net.h @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef SINGA_MODEL_FEED_FORWARD_NET_H_ +#define SINGA_MODEL_FEED_FORWARD_NET_H_ +#include "singa/model/layer.h" + +namespace singa { + +/// The feed-forward neural net. +/// It provides functions for constructing the layers, access layer parameters, +/// and conducting training, evaluation and prediction. +class FeedForwardNet { + public: + FeedForwardNet() = explicit; + ~FeedForwardNet(); + + /// Add a layer with the assumption that + /// 1. this function is called in correct order, i.e., the layers are added + /// following the topological order. + /// 2. this layer has already been setup (Setup function is called outside). + void Add(Layer *layer); + + // TODO(wangwei) add ConcatenateLayer and SliceLayer + // AddConcatenateLayer(vector<Layer*> src, Layer *dst); + // AddSliceLayer(Layer* layer, vector<Layer*> dst); + + /// Add a layer by providing its configuration, and setup it. + /// Assume the layer is added in corret order. + /// For the first layer, 'sample_shape' (the input sample shape) is necessary + /// for calling Setup(). + void Add(const LayerConf &conf, const Shape *sample_shape = nullptr); + + /// Set some fields used for training and evaluating the neural net. + /// If the neural net is constructed for evaluation only, then 'opt' is not + /// necessary; But for training, both 'opt' and 'loss' are necessary. + /// 'shuffle' indicates shuffling training samples within one epoch it is + /// valid using Train(); + void Compile(bool shuffle, Optimizer *opt, Loss *loss, Metric *metric); + + /// Conduct the training giving the training data 'x' and label 'y'. + /// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is + /// typically used for small training datasets, e.g., cifar10 and MNIST which + /// can be stored in main memory. + void Train(int batchsize, int nb_epoch, Tensor x, Tensor y); + /// Conduct the training giving the training data 'x' and label 'y'. + /// 'val_split' is a ratio for splitting (1-'val_split') of training data for + /// validation. Validation is performance before every epoch. + /// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is + /// typically used for small training datasets, e.g., cifar10 and MNIST which + /// can be stored in main memory. + void Train(int batchsize, int nb_epoch, float val_split, Tensor x, Tensor y); + /// Conduct the training given the training and validation data. + /// Validation is performance before every epoch. + /// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is + /// typically used for small training datasets, e.g., cifar10 and MNIST which + /// can be stored in main memory. + void Train(int batchsize, int nb_epoch, Tensor x, Tensor y, Tensor val_x, + Tensor val_y); + /// Train the neural net over one batch of training data. + Tensor TrainOnBatch(Tensor x, Tensor y); + + /// Evaluate the neural net with given data. + /// Returns one tensor for loss values and one tensor for metric values; + /// Each sample would have a loss value and a metric value (if 'metic' is set + /// in Compile()).'batchsize' is used for controlling the memory footprint. + /// It should be smaller than the total number of samples. + /// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is + /// typically used for small training datasets, e.g., cifar10 and MNIST which + /// can be stored in main memory. + std::pair<Tensor, Tensor> Evaluate(Tensor x, Tensor y, int batchsize = 128); + /// Evaluate the neural net for one batch of data + std::pair<Tensor, Tensor> EvaluateOnBatch(Tensor x, Tensor y); + + /// Predict the probability distributation over candicate classes for each + /// data sample. 'batchsize' is used for controlling the memory footprint. + /// It should be smaller than the total number of samples. + /// Due to memory limit, 'x' and 'y' could not be very large. Hence, it is + /// typically used for small training datasets, e.g., cifar10 and MNIST which + /// can be stored in main memory. + Tensor Predict(const Tensor &x, int batchsize = 128); + /// Predict for one batch data. + Tensor PredictOnBatch(const Tensor &x); + + /// Forward layers one by one using the data batch 'x'. + /// Returns the prediction results (from the last layer). + Tensor Forward(const Tensor& x); + /// Backward layers one by one using the gradient batch 'grad'. + /// Returns the parameter gradients. + const vector<Tensor> Backward(const Tensor& grad); + + /// Clone the neuaral net by cloning every layer to the given device. + /// If 'device' is nullptr, then clone it one the current device. + FeedForwardNet Clone(std::shared_ptr<Device> device = nullptr); + /// Move the layer data to the given device. + void ToDevice(Device *device); + /// Set the data type of each layer. + void AsType(DataType dtype); + + const vector<Layer *> layers() const { return layers_; } + const vector<string> GetParamNames() const; + const vector<Tensor *> GetParamValues() const; + const vector<Tensor *> GetParamGrads() const; + + protected: + vector<Layer *> layers_; + Optimizer *opt_; + Loss *loss_; + Metric *metric_; + + bool shuffle_ = true; + Device* device_ = nullptr; + DataType dtype_ = kFloat32; +}; + +} /* singa */ + + +#endif // SINGA_MODEL_FEED_FORWARD_NET_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/include/singa/model/layer.h ---------------------------------------------------------------------- diff --git a/include/singa/model/layer.h b/include/singa/model/layer.h index 0570b04..79eb069 100644 --- a/include/singa/model/layer.h +++ b/include/singa/model/layer.h @@ -149,6 +149,9 @@ class Layer { return std::make_pair(input_grad, param_grad); } + /// Clone the layer to the given device. Layer data (e.g., parameters) are + /// deep copied. If 'device' is nullptr, then clone it one the current device. + virtual Layer* Clone(std::shared_ptr<Device> device); /// Move the layer (including its parameters and other internal Tensor) onto /// the given device virtual void ToDevice(std::shared_ptr<Device> device) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/include/singa/utils/singleton.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/singleton.h b/include/singa/utils/singleton.h index 4cf487e..de831c4 100644 --- a/include/singa/utils/singleton.h +++ b/include/singa/utils/singleton.h @@ -7,9 +7,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at -* +* * http://www.apache.org/licenses/LICENSE-2.0 -* +* * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -22,10 +22,8 @@ #ifndef SINGA_UTILS_SINGLETON_H_ #define SINGA_UTILS_SINGLETON_H_ -/** - * Thread-safe implementation for C++11 according to - * http://stackoverflow.com/questions/2576022/efficient-thread-safe-singleton-in-c - */ +/// Thread-safe implementation for C++11 according to +// http://stackoverflow.com/questions/2576022/efficient-thread-safe-singleton-in-c template<typename T> class Singleton { public: @@ -35,18 +33,18 @@ class Singleton { } }; -/** - * Thread Specific Singleton - * - * Each thread will have its own data_ storage. - */ +/// Thread Specific Singleton +/// Each thread will have its own data_ storage. +/* template<typename T> class TSingleton { public: static T* Instance() { - static thread_local T data_; + static thread_local T data_; // thread_local is not available in some + // compilers return &data_; } }; +*/ #endif // SINGA_UTILS_SINGLETON_H_ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/include/singa/utils/string.h ---------------------------------------------------------------------- diff --git a/include/singa/utils/string.h b/include/singa/utils/string.h index b739afc..cbfb28b 100644 --- a/include/singa/utils/string.h +++ b/include/singa/utils/string.h @@ -42,6 +42,15 @@ inline string ToLowerCase(const string& input) { return out; } +inline int ArgPos(int argc, char** arglist, const char* arg) { + for (int i = 0; i < argc; i++) { + if (strcmp(arglist[i], arg) == 0) { + return i; + } + } + return -1; +} + /** * Tokenize a string. * http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --git a/src/core/tensor/tensor.cc b/src/core/tensor/tensor.cc index b852a54..ec59aaa 100644 --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@ -130,20 +130,24 @@ void Tensor::ToDevice(std::shared_ptr<Device> dst) { void Tensor::ToHost() { ToDevice(device_->host()); } template <typename DType> -void Tensor::CopyDataFromHostPtr(const DType *src, const size_t num) { +void Tensor::CopyDataFromHostPtr(const DType *src, const size_t num, + const size_t offset) { CHECK_EQ(sizeof(DType), SizeOf(data_type_)) << "data_type is " << DataType_Name(data_type_) << " user given type is of size " << sizeof(DType); if (src != nullptr) { - device_->CopyDataFromHostPtr(block(), src, sizeof(DType) * num, 0); + device_->CopyDataFromHostPtr(block(), src, sizeof(DType) * num, offset); } else { LOG(WARNING) << "Copy data from null host ptr"; } } template void Tensor::CopyDataFromHostPtr(const unsigned char *src, - const size_t num); -template void Tensor::CopyDataFromHostPtr(const float *src, const size_t num); -template void Tensor::CopyDataFromHostPtr(const int *src, const size_t num); + const size_t num, + const size_t offset); +template void Tensor::CopyDataFromHostPtr(const float *src, const size_t num, + const size_t offset); +template void Tensor::CopyDataFromHostPtr(const int *src, const size_t num, + const size_t offset); void Tensor::CopyData(const Tensor &src) { CHECK_EQ(Size(), src.Size()); @@ -154,7 +158,9 @@ void Tensor::CopyData(const Tensor &src) { } } -Tensor Tensor::Clone() const { +Tensor Tensor::Clone(std::shared_ptr<Device> device) const { + if (device == nullptr) + device = device_; Tensor t(shape_, device_, data_type_); t.transpose_ = transpose_; t.CopyData(*this); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/d826b2e8/src/model/feed_forward_net.cc ---------------------------------------------------------------------- diff --git a/src/model/feed_forward_net.cc b/src/model/feed_forward_net.cc new file mode 100644 index 0000000..f9e6480 --- /dev/null +++ b/src/model/feed_forward_net.cc @@ -0,0 +1,263 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "singa/model/feed_forward_net.h" +#include "singa/utils/logging.h" +namespace singa { + +~FeedForwardNet::FeedForwardNet() { + for (auto layer : layers_) + delete layer; +} +Layer* FeedForwardNet::Add(Layer* layer) { + layers_.push_back(layer); + return layer; +} + +Layer* FeedForwardNet::Add(const LayerConf& conf, const Shape* sample_shape) { + CHECK(sample_shape != nullptr || layers_.size()) + << "Must provide the input sample shape for the first layer"; + Layer* layer = CreateLayer(conf.type()); + if (sample_shape == nullptr) + layer->Setup(layers_.back()->GetOutputSampleShape(), conf); + else + layer->Setup(*sample_shape, conf); + Add(layer); + return layer; +} + +const vector<string> FeedForwardNet::GetParamNames() const { + vector<string> names; + for (auto layer : layers_) + for (const auto name : layer->param_names()) + names.push_back(name); + return names; +} +const vector<Tensor *> FeedForwardNet::GetParamValues() const { + vector<Tensor *> values; + for (auto layer : layers_) + for (const auto value : layer->param_values()) + values.push_back(value); + return values; +} + +const vector<Tensor *> FeedForwardNet::GetParamSpecs() const { + vector<ParamSpec *> specs; + for (auto layer : layers_) + for (const auto spec : layer->param_specs()) + specs.push_back(spec); + return specs; +} + +void FeedForwardNet::Compile(bool shuffle, Optimizer* opt, Loss* loss, + Metric* metric) { + shuffle_ = shuffle; + bool train = (opt != nullptr) && (loss != nullptr); + bool test = metric != nullptr; + CHECK(train || test) << "Must set opt and loss, or set metric"; + opt_ = opt; + loss_ = loss; + metric_ = metric; +} + +void FeedForwardNet::ToDevice(std::shared_ptr<Device> device) { + for (auto layer: layers_) + layer->ToDevice(device); + opt_->ToDevice(device); + loss_->ToDevice(device); + metric_->ToDevice(device); +} + +FeedForwardNet FeedForwardNet::Clone(std::shared_ptr<Device> device) { + FeedForwardNet net; + /* + for (auto layer: layers_) + net.layers_.push_back(layer->CloneTo(device)); + if (opt_ != nullptr) + net.opt_ = opt_->CloneTo(device); + if (loss_ != nullptr) + net.loss_ = loss_.CloneTo(device); + if (metric_ != nullptr) + net.metric_ = metric_->CloneTo(device); + net.shuffle_ = shuffle_; + net.device_ = device; + net.dtype_ = dtype; + */ +} + +void FeedForwardNet::AsType(DataType dtype) { + LOG(FATAL) << "FeedForwardNet::AsType not implemented"; +} + +void FeedForwardNet::Train(int batchsize, int nb_epoch, Tensor x, Tensor y) { + CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; + int num_extra_samples = x.shape(0) % batchsize; + if (num_extra_samples != 0) + LOG(WARNING) << "The last " << num_extra_samples << " would not be used"; + Channel *ch = GetChannel("perf"); + for (int epoch = 0; epoch < nb_epoch; epoch++) { + float loss = 0.0f, metric = 0.0f; + int batch = 0; + for (; batch < x.shape(0) / batchsize; batch++) { + Tesnor bx = x.Slice(batch * batchsize, batch * batchsize + batchsize); + Tesnor by = y.Slice(batch * batchsize, batch * batchsize + batchsize); + const auto ret = TrainOnBatch(bx, by); + loss += ret.first; + metric += ret.second; + } + loss /= batch; + metric /= batch; + ch->Send("Epoch " + std::to_string(epoch) + ", training loss = " + + std::to_string(loss) + ", accuracy = " + std::to_string(metric)); + } +} + +void FeedForwardNet::Train(int batchsize, int nb_epoch, Tensor x, Tensor y, + float val_split) { + CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; + size_t num_train = x.shape(0) * val_split; + const Tensor train_x = CopyRows(x, 0, num_train); + const Tensor train_y = CopyRows(y, 0, num_train); + const Tensor val_x = CopyRows(x, num_train, x.shape(0)); + const Tensor val_y = CopyRows(y, num_train, x.shape(0)); + Train(batchsize, nb_epoch, train_x, train_y, val_x, val_y); +} + + +void FeedForwardNet::Train(int batchsize, int nb_epoch, Tensor x, Tensor y, + const Tensor & val_x, const Tensor &val_y) { + CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; + int num_extra_samples = x.shape(0) % batchsize; + if (num_extra_samples != 0) + LOG(WARNING) << "The last " << num_extra_samples << " would not be used"; + Channel *train_ch = GetChannel("train_perf"); + Channel *test_ch = GetChannel("test_perf"); + for (int epoch = 0; epoch < nb_epoch; epoch++) { + float loss = 0.0f, metric = 0.0f; + int b = 0; + for (;b < x.shape(0) / batchsize; b++) { + Tesnor bx = CopyRows(x, b * batchsize, b * batchsize + batchsize); + Tesnor by = CopyRows(y, b * batchsize, b * batchsize + batchsize); + const auto ret = TrainOnBatch(bx, by); + loss += ret.first; + metric += ret.second; + } + loss /= batch; + metric /= batch; + train_ch->Send("Epoch " + std::to_string(epoch) + ", training loss = " + + std::to_string(loss) + ", accuracy = " + std::to_string(metric)); + const auto val_perf = Evaluate(val_x, val_y, batchsize); + test_ch->Send("Epoch " + std::to_string(epoch) + + ", test loss = " + std::to_string(Average(val_perf.first)) + + ", metric = " + std::to_string(Average(val_perf.second))); + } +} + +const std::pair<float, float> FeedForwardNet::TrainOnBatch(const Tensor x, + const Tensor y) { + const Tensor fea = Forward(kTrain, bx); + float loss = loss->Evaluate(fea, fy); + float metric = metric->Evaluate(fea, by); + const Tensor grad = loss->Backward(); + Backward(kTrain, grad); + return std::make_pair(loss, metric); +} + +const Tensor FeedForwardNet::Forward(int flag, const Tensor data) { + Tensor tmp = data; + for (auto layer : layers_) { + tmp = layer->Forward(flag, tmp); + } + return tmp; +} + +cons vector<Tensor> FeedForwardNet::Backward(int flag, const Tensor grad) { + vector<Tensor> param_grads; + Tensor tmp = grad; + for (size_t i = layers_.size() - 1; i >= 0; i--) { + auto ret = layers_.at(i)->Backward(flag, tmp); + tmp =ret.first; + if (ret.second.size()) + for (const auto x: ret.second) + param_grads.push_back(x); + } + return param_grads; +} + +std::pair<Tensor, Tensor> Evaluate(Tensor x, Tensor y, int batchsize) { + CHECK_EQ(x.shape(0), y.shape(0)) << "Diff num of sampels in x and y"; + CHECK_GE(x.shape(0), batchsize); + int num_extra_samples = x.shape(0) % batchsize; + int b = 0; + Tensor loss(Shape{x.shape(0)}), metric(Shape{x.shape(0)}); + for (; b < x.shape(0) / batchsize; b++) { + int start = b * batchsize, end = start + batchsize; + const Tensor bx = CopyRows(x, start, end); + const Tensor by = CopyRows(y, start, end); + const Tensor fea = Forward(kEval, bx); + const auto ret = EvaluateOnBatch(bx, by); + CopyDataToFrom(&loss, ret.first, batchsize, start, 0); + CopyDataToFrom(&metric, ret.second, batchsize, start, 0); + } + { + int start = x.shape(0) - batchsize, end = x.shape(0); + const Tensor bx = CopyRows(x, start, end); + const Tensor by = CopyRows(y, start, end); + const auto ret = EvaluateOnBatch(bx, by); + int dst_offset = x.shape(0) - num_extra_samples; + int src_offset = batchsize - num_extra_samples; + CopyDataToFrom(&loss, ret.first, num_extra_samples, dst_offset, src_offset); + CopyDataToFrom(&metric, ret.second, num_extra_samples, dst_offset, + src_offset); + } + return std::make_pair(loss, metric); +} + +std::pair<Tensor, Tensor> FeedForwardNet::EvaluateOnBatch(const Tensor& x, + const Tensor& y) { + const Tensor fea = Forward(kEval, bx); + const Tensor m = metric_->Forward(fea, by); + const Tensor l = loss_->Forward(fea, by); + return std::make_pair(m, l); +} + +const Tensor FeedForwardNet::Predict(const Tensor& x, int batchsize) { + CHECK_GE(x.shape(0), batchsize); + int num_extra_samples = x.shape(0) % batchsize; + const auto outshape = layers_.back().GetOutputSampleShape(); + Tensor y(Shape{x.shape(0), Product(outshape)}, x.device()); + for (int b = 0; b < x.shape(0) / batchsize; b++) { + int start = b * batchsize, end = start + batchsize; + const Tensor bx = CopyRows(x, start, end); + CopyDataToFrom(&y, PredictOnBatch(bx), batchsize * y.shape(1), + start * y.shape(1), 0); + } + if (num_extra_samples > 0) { + int start = x.shape(0) - batchsize, end = x.shape(0); + const Tensor bx = CopyRows(x, start, end); + CopyDataToFrom(&y, PredictOnBatch(bx), num_extra_samples * y.shape(1), + (x.shape(0) - num_extra_samples) * y.shape(1), + (batchsize - num_extra_samples) * y.shape(1)); + } + return y; +} + +const Tensor FeedForwardNet::PredictOnBatch(const Tensor& x) { + return Foward(kEval, x); +} +} // namespace singa
