Repository: incubator-singa Updated Branches: refs/heads/master 654d733ba -> 48b8fea59
update example of training MLP on MNIST dataset (replacing lmdb with DataShard) Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/48b8fea5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/48b8fea5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/48b8fea5 Branch: refs/heads/master Commit: 48b8fea5949ff1ba0baee72deb455136c177525e Parents: 654d733 Author: wang wei <[email protected]> Authored: Sat May 9 22:50:29 2015 +0800 Committer: wang wei <[email protected]> Committed: Sat May 9 22:50:29 2015 +0800 ---------------------------------------------------------------------- examples/mnist/cluster.conf | 4 +- examples/mnist/create_shard.cc | 115 +++++++++++++++++++ examples/mnist/mlp-lmdb.conf | 223 ++++++++++++++++++++++++++++++++++++ examples/mnist/mlp.conf | 10 +- 4 files changed, 345 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/cluster.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/cluster.conf b/examples/mnist/cluster.conf index 1953d1d..6b8a8e6 100644 --- a/examples/mnist/cluster.conf +++ b/examples/mnist/cluster.conf @@ -1,3 +1,5 @@ nworker_groups: 1 nserver_groups: 1 -workspace: "/data1/wangwei/singa/data/mnist" +nservers_per_group: 1 +nworkers_per_group: 1 +workspace: "examples/cifar10/" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/create_shard.cc ---------------------------------------------------------------------- diff --git a/examples/mnist/create_shard.cc b/examples/mnist/create_shard.cc new file mode 100644 index 0000000..f545b80 --- /dev/null +++ b/examples/mnist/create_shard.cc @@ -0,0 +1,115 @@ +// +// This code creates DataShard for MNIST dataset. +// It is adapted from the convert_mnist_data from Caffe +// +// Usage: +// create_shard.bin input_image_file input_label_file output_folder +// The MNIST dataset could be downloaded at +// http://yann.lecun.com/exdb/mnist/ + +#include <gflags/gflags.h> +#include <glog/logging.h> +#include <cstdint> +#include <iostream> + +#include <fstream> +#include <string> + +#include "utils/data_shard.h" +#include "utils/common.h" +#include "proto/model.pb.h" + +using singa::DataShard; +using singa::WriteProtoToBinaryFile; +using std::string; + +uint32_t swap_endian(uint32_t val) { + val = ((val << 8) & 0xFF00FF00) | ((val >> 8) & 0xFF00FF); + return (val << 16) | (val >> 16); +} + +void create_shard(const char* image_filename, const char* label_filename, + const char* output) { + // Open files + std::ifstream image_file(image_filename, std::ios::in | std::ios::binary); + std::ifstream label_file(label_filename, std::ios::in | std::ios::binary); + CHECK(image_file) << "Unable to open file " << image_filename; + CHECK(label_file) << "Unable to open file " << label_filename; + // Read the magic and the meta data + uint32_t magic; + uint32_t num_items; + uint32_t num_labels; + uint32_t rows; + uint32_t cols; + + image_file.read(reinterpret_cast<char*>(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2051) << "Incorrect image file magic."; + label_file.read(reinterpret_cast<char*>(&magic), 4); + magic = swap_endian(magic); + CHECK_EQ(magic, 2049) << "Incorrect label file magic."; + image_file.read(reinterpret_cast<char*>(&num_items), 4); + num_items = swap_endian(num_items); + label_file.read(reinterpret_cast<char*>(&num_labels), 4); + num_labels = swap_endian(num_labels); + CHECK_EQ(num_items, num_labels); + image_file.read(reinterpret_cast<char*>(&rows), 4); + rows = swap_endian(rows); + image_file.read(reinterpret_cast<char*>(&cols), 4); + cols = swap_endian(cols); + + DataShard shard(output, DataShard::kCreate); + char label; + char* pixels = new char[rows * cols]; + int count = 0; + const int kMaxKeyLength = 10; + char key[kMaxKeyLength]; + string value; + + singa::Record record; + singa::SingleLabelImageRecord* image=record.mutable_image(); + image->add_shape(rows); + image->add_shape(cols); + LOG(INFO) << "A total of " << num_items << " items."; + LOG(INFO) << "Rows: " << rows << " Cols: " << cols; + for (int item_id = 0; item_id < num_items; ++item_id) { + image_file.read(pixels, rows * cols); + label_file.read(&label, 1); + image->set_pixel(pixels, rows*cols); + image->set_label(label); + snprintf(key, kMaxKeyLength, "%08d", item_id); + shard.Insert(string(key), record); + } + delete pixels; + shard.Flush(); +} + +int main(int argc, char** argv) { +/* +#ifndef GFLAGS_GFLAGS_H_ + namespace gflags = google; +#endif + gflags::SetUsageMessage("This program create a DataShard for a MNIST dataset\n" + "Usage:\n" + " create_shard.bin input_image_file input_label_file output_db_file\n" + "The MNIST dataset could be downloaded at\n" + " http://yann.lecun.com/exdb/mnist/\n" + "You should gunzip them after downloading."); + gflags::ParseCommandLineFlags(&argc, &argv, true); + gflags::ShowUsageWithFlagsRestrict(argv[0], + "examples/mnist/create_shard.bin"); +*/ + + if (argc != 4) { + std::cout<<"This program create a DataShard for a MNIST dataset\n" + "Usage:\n" + " create_shard.bin input_image_file input_label_file output_db_file\n" + "The MNIST dataset could be downloaded at\n" + " http://yann.lecun.com/exdb/mnist/\n" + "You should gunzip them after downloading."; + } else { + google::InitGoogleLogging(argv[0]); + create_shard(argv[1], argv[2], argv[3]); + } + return 0; +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp-lmdb.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/mlp-lmdb.conf b/examples/mnist/mlp-lmdb.conf new file mode 100644 index 0000000..d0ed08f --- /dev/null +++ b/examples/mnist/mlp-lmdb.conf @@ -0,0 +1,223 @@ +name: "deep-big-simple-mlp" +train_steps: 10000 +test_steps:10 +test_frequency:60 +display_frequency:30 +checkpoint_frequency:120 +updater{ + base_learning_rate: 0.001 + learning_rate_change_method: kStep + learning_rate_change_frequency: 60 + gamma: 0.997 + param_type: "Param" +} + +neuralnet { +layer { + name: "data" + type: "kLMDBData" + data_param { + path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb" + batchsize: 1000 + random_skip: 10000 + } + exclude: kTest +} + +layer { + name: "data" + type: "kLMDBData" + data_param { + path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb" + batchsize: 1000 + } + exclude: kTrain +} + +layer{ + name:"mnist" + type: "kMnistImage" + srclayers: "data" + mnist_param { +# sigma: 6 +# alpha: 38 +# gamma: 15 +# kernel: 21 +# elastic_freq:100 +# beta:15 +# resize: 29 + norm_a: 127.5 + norm_b: 1 + } +} + + +layer{ + name: "label" + type: "kLabel" + srclayers: "data" +} + +layer{ + name: "fc1" + type: "kInnerProduct" + srclayers:"mnist" + inner_product_param{ + num_output: 2500 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } +} + +layer{ + name: "tanh1" + type:"kTanh" + srclayers:"fc1" +} +layer{ + name: "fc2" + type: "kInnerProduct" + srclayers:"tanh1" + inner_product_param{ + num_output: 2000 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } +} + +layer{ + name: "tanh2" + type:"kTanh" + srclayers:"fc2" +} +layer{ + name: "fc3" + type: "kInnerProduct" + srclayers:"tanh2" + inner_product_param{ + num_output: 1500 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } + +} + +layer{ + name: "tanh3" + type:"kTanh" + srclayers:"fc3" +} +layer{ + name: "fc4" + type: "kInnerProduct" + srclayers:"tanh3" + inner_product_param{ + num_output: 1000 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } + +} + +layer{ + name: "tanh4" + type:"kTanh" + srclayers:"fc4" +} +layer{ + name: "fc5" + type: "kInnerProduct" + srclayers:"tanh4" + inner_product_param{ + num_output: 500 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } + +} + +layer{ + name: "tanh5" + type:"kTanh" + srclayers:"fc5" +} +layer{ + name: "fc6" + type: "kInnerProduct" + srclayers:"tanh5" + inner_product_param{ + num_output: 10 + } + param{ + name: "weight" + init_method: kUniform + low:-0.05 + high:0.05 + } + param{ + name: "bias" + init_method: kUniform + low: -0.05 + high:0.05 + } +} +layer{ + name: "loss" + type:"kSoftmaxLoss" + softmaxloss_param{ + topk:1 + } + srclayers:"fc6" + srclayers:"label" +} +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/48b8fea5/examples/mnist/mlp.conf ---------------------------------------------------------------------- diff --git a/examples/mnist/mlp.conf b/examples/mnist/mlp.conf index d0ed08f..9eeb1c6 100644 --- a/examples/mnist/mlp.conf +++ b/examples/mnist/mlp.conf @@ -3,7 +3,6 @@ train_steps: 10000 test_steps:10 test_frequency:60 display_frequency:30 -checkpoint_frequency:120 updater{ base_learning_rate: 0.001 learning_rate_change_method: kStep @@ -15,20 +14,19 @@ updater{ neuralnet { layer { name: "data" - type: "kLMDBData" + type: "kShardData" data_param { - path: "/home/wangwei/program/singa/examples/mnist/mnist_train_lmdb" + path: "examples/mnist/mnist_train_shard" batchsize: 1000 - random_skip: 10000 } exclude: kTest } layer { name: "data" - type: "kLMDBData" + type: "kShardData" data_param { - path: "/home/wangwei/program/singa/examples/mnist/mnist_test_lmdb" + path: "examples/mnist/mnist_test_shard" batchsize: 1000 } exclude: kTrain
