Repository: incubator-singa Updated Branches: refs/heads/master 86977fb5e -> 7993a7867
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/examples/cifar10/Makefile ---------------------------------------------------------------------- diff --git a/examples/cifar10/Makefile b/examples/cifar10/Makefile new file mode 100644 index 0000000..40fece6 --- /dev/null +++ b/examples/cifar10/Makefile @@ -0,0 +1,20 @@ +libs :=singa glog protobuf + +.PHONY: all download create + +download: cifar-10-binary-bin + +cifar-10-binary-bin: + wget http://www.cs.toronto.edu/~kriz/cifar-10-binary.tar.gz + tar xf cifar-10-binary.tar.gz + +create: + $(CXX) create_shard.cc -std=c++11 -lsinga -lglog -lprotobuf -I../../include \ + -L../../.libs/ -Wl,-unresolved-symbols=ignore-in-shared-libs -Wl,-rpath=../../.libs/ \ + -o create_shard.bin + mkdir cifar10_train_shard + mkdir cifar10_test_shard + ./create_shard.bin cifar-10-batches-bin . + + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/examples/cifar10/cluster.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/cluster.conf b/examples/cifar10/cluster.conf index 1953d1d..6b8a8e6 100644 --- a/examples/cifar10/cluster.conf +++ b/examples/cifar10/cluster.conf @@ -1,3 +1,5 @@ nworker_groups: 1 nserver_groups: 1 -workspace: "/data1/wangwei/singa/data/mnist" +nservers_per_group: 1 +nworkers_per_group: 1 +workspace: "examples/cifar10/" http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/examples/cifar10/create_shard.cc ---------------------------------------------------------------------- diff --git a/examples/cifar10/create_shard.cc b/examples/cifar10/create_shard.cc new file mode 100644 index 0000000..75e5abe --- /dev/null +++ b/examples/cifar10/create_shard.cc @@ -0,0 +1,112 @@ +// +// This code creates training and test DataShard for CIFAR dataset. +// It is adapted from the convert_cifar_data from Caffe +// +// Usage: +// create_shard.bin input_folder output_folder +// +// The CIFAR dataset could be downloaded at +// http://www.cs.toronto.edu/~kriz/cifar.html +// + +#include <fstream> +#include <string> + +#include <glog/logging.h> +#include <cstdint> +#include <iostream> + +#include "utils/data_shard.h" +#include "utils/common.h" +#include "proto/model.pb.h" + +using std::string; + +using singa::DataShard; +using singa::WriteProtoToBinaryFile; + +const int kCIFARSize = 32; +const int kCIFARImageNBytes = 3072; +const int kCIFARBatchSize = 10000; +const int kCIFARTrainBatches = 5; + +void read_image(std::ifstream* file, int* label, char* buffer) { + char label_char; + file->read(&label_char, 1); + *label = label_char; + file->read(buffer, kCIFARImageNBytes); + return; +} + +void create_shard(const string& input_folder, const string& output_folder) { + int label; + // Data buffer + char str_buffer[kCIFARImageNBytes]; + singa::Record record; + singa::SingleLabelImageRecord* image=record.mutable_image();; + image->add_shape(3); + image->add_shape(kCIFARSize); + image->add_shape(kCIFARSize); + + singa::SingleLabelImageRecord mean; + mean.CopyFrom(*image); + for(int i=0;i<kCIFARImageNBytes;i++) + mean.add_data(0.); + + DataShard train_shard(output_folder+"/cifar10_train_shard",DataShard::kCreate); + LOG(INFO) << "Writing Training data"; + int count=0; + for (int fileid = 0; fileid < kCIFARTrainBatches; ++fileid) { + // Open files + LOG(INFO) << "Training Batch " << fileid + 1; + snprintf(str_buffer, kCIFARImageNBytes, "/data_batch_%d.bin", fileid + 1); + std::ifstream data_file((input_folder + str_buffer).c_str(), + std::ios::in | std::ios::binary); + CHECK(data_file) << "Unable to open train file #" << fileid + 1; + for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) { + read_image(&data_file, &label, str_buffer); + image->set_label(label); + image->set_pixel(str_buffer, kCIFARImageNBytes); + int length = snprintf(str_buffer, kCIFARImageNBytes, "%05d", + fileid * kCIFARBatchSize + itemid); + CHECK(train_shard.Insert(string(str_buffer, length), record)); + + const string& pixels=image->pixel(); + for(int i=0;i<kCIFARImageNBytes;i++) + mean.set_data(i, mean.data(i)+static_cast<uint8_t>(pixels[i])); + count+=1; + } + } + train_shard.Flush(); + for(int i=0;i<kCIFARImageNBytes;i++) + mean.set_data(i, mean.data(i)/count); + WriteProtoToBinaryFile(mean, (output_folder+"/image_mean.bin").c_str()); + + LOG(INFO) << "Writing Testing data"; + DataShard test_shard(output_folder+"/cifar10_test_shard",DataShard::kCreate); + // Open files + std::ifstream data_file((input_folder + "/test_batch.bin").c_str(), + std::ios::in | std::ios::binary); + CHECK(data_file) << "Unable to open test file."; + for (int itemid = 0; itemid < kCIFARBatchSize; ++itemid) { + read_image(&data_file, &label, str_buffer); + image->set_label(label); + image->set_pixel(str_buffer, kCIFARImageNBytes); + int length = snprintf(str_buffer, kCIFARImageNBytes, "%05d", itemid); + CHECK(test_shard.Insert(string(str_buffer, length), record)); + } + test_shard.Flush(); +} + +int main(int argc, char** argv) { + if (argc != 3) { + std::cout<<"Create train and test DataShard for Cifar dataset.\n" + <<"Usage:\n" + <<" create_shard.bin input_folder output_folder\n" + <<"Where the input folder should contain the binary batch files.\n"; + } else { + google::InitGoogleLogging(argv[0]); + create_shard(string(argv[1]), string(argv[2])); + } + return 0; +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/examples/cifar10/model-lmdb.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model-lmdb.conf b/examples/cifar10/model-lmdb.conf new file mode 100644 index 0000000..ea22ccd --- /dev/null +++ b/examples/cifar10/model-lmdb.conf @@ -0,0 +1,218 @@ +name: "cifar10-convnet" +train_steps: 70000 +test_steps:100 +test_frequency:1000 +display_frequency:50 +updater{ + momentum:0.9 + weight_decay:0.004 + learning_rate_change_method:kFixedStep + step:0 + step:60000 + step:65000 + step_lr:0.001 + step_lr:0.0001 + step_lr:0.00001 +} +neuralnet { +layer { + name: "data" + type: "kLMDBData" + data_param { + path: "examples/cifar10/cifar10_train_lmdb" + batchsize: 100 + } + exclude: kTest +} + +layer { + name: "data" + type: "kLMDBData" + data_param { + path: "examples/cifar10/cifar10_test_lmdb" + batchsize: 100 + } + exclude: kTrain +} + +layer{ + name:"rgb" + type: "kRGBImage" + srclayers: "data" + rgbimage_param { + meanfile: "examples/cifar10/mean.binaryproto" + } +} + +layer{ + name: "label" + type: "kLabel" + srclayers: "data" +} +layer { + name: "conv1" + type: "kConvolution" + srclayers: "rgb" + convolution_param { + num_filters: 32 + kernel: 5 + stride: 1 + pad:2 + } + param{ + name: "weight" + init_method:kGaussian + std:0.0001 + learning_rate_multiplier:1.0 + } + param{ + name: "bias" + init_method: kConstant + learning_rate_multiplier:2.0 + value:0 + } +} +layer { + name: "pool1" + type: "kPooling" + srclayers: "conv1" + pooling_param { + pool: MAX + kernel: 3 + stride: 2 + } +} +layer { + name: "relu1" + type: "kReLU" + srclayers:"pool1" +} +layer { + name: "norm1" + type: "kLRN" + lrn_param { + norm_region: WITHIN_CHANNEL + local_size: 3 + alpha: 5e-05 + beta: 0.75 + } + srclayers:"relu1" +} +layer { + name: "conv2" + type: "kConvolution" + srclayers: "norm1" + convolution_param { + num_filters: 32 + kernel: 5 + stride: 1 + pad:2 + } + param{ + name: "weight" + init_method:kGaussian + std:0.01 + learning_rate_multiplier:1.0 + } + param{ + name: "bias" + init_method: kConstant + learning_rate_multiplier:2.0 + value:0 + } +} +layer { + name: "relu2" + type: "kReLU" + srclayers:"conv2" +} +layer { + name: "pool2" + type: "kPooling" + srclayers: "relu2" + pooling_param { + pool: MAX + kernel: 3 + stride: 2 + } +} +layer { + name: "norm2" + type: "kLRN" + lrn_param { + norm_region: WITHIN_CHANNEL + local_size: 3 + alpha: 5e-05 + beta: 0.75 + } + srclayers:"pool2" +} +layer { + name: "conv3" + type: "kConvolution" + srclayers: "norm2" + convolution_param { + num_filters: 64 + kernel: 5 + stride: 1 + pad:2 + } + param{ + name: "weight" + init_method:kGaussian + std:0.01 + } + param{ + name: "bias" + init_method: kConstant + value:0 + } +} +layer { + name: "relu3" + type: "kReLU" + srclayers:"conv3" +} +layer { + name: "pool3" + type: "kPooling" + srclayers: "relu3" + pooling_param { + pool: AVE + kernel: 3 + stride: 2 + } +} +layer { + name: "ip1" + type: "kInnerProduct" + srclayers:"pool3" + inner_product_param { + num_output: 10 + } + param{ + name: "weight" + init_method:kGaussian + std:0.01 + learning_rate_multiplier:1.0 + weight_decay_multiplier:250 + } + param{ + name: "bias" + init_method: kConstant + learning_rate_multiplier:2.0 + weight_decay_multiplier:0 + value:0 + } +} + +layer{ + name: "loss" + type:"kSoftmaxLoss" + softmaxloss_param{ + topk:1 + } + srclayers:"ip1" + srclayers:"label" +} +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/examples/cifar10/model.conf ---------------------------------------------------------------------- diff --git a/examples/cifar10/model.conf b/examples/cifar10/model.conf index 9979c3b..09e64aa 100644 --- a/examples/cifar10/model.conf +++ b/examples/cifar10/model.conf @@ -17,20 +17,19 @@ updater{ neuralnet { layer { name: "data" - type: "kLMDBData" + type: "kShardData" data_param { - path: "/home/wangwei/program/singa/examples/cifar10/cifar10_train_lmdb" + path: "examples/cifar10/cifar10_train_shard" batchsize: 100 - random_skip:10000 } exclude: kTest } layer { name: "data" - type: "kLMDBData" + type: "kShardData" data_param { - path: "/home/wangwei/program/singa/examples/cifar10/cifar10_test_lmdb" + path: "examples/cifar10/cifar10_test_shard" batchsize: 100 } exclude: kTrain @@ -41,7 +40,7 @@ layer{ type: "kRGBImage" srclayers: "data" rgbimage_param { - meanfile: "/home/wangwei/program/singa/examples/cifar10/mean.binaryproto" + meanfile: "examples/cifar10/image_mean.bin" } } http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/da3b5dd9/src/neuralnet/layer.cc ---------------------------------------------------------------------- diff --git a/src/neuralnet/layer.cc b/src/neuralnet/layer.cc index d45bcc0..71c6f2a 100644 --- a/src/neuralnet/layer.cc +++ b/src/neuralnet/layer.cc @@ -648,10 +648,17 @@ void RGBImageLayer::Setup(const LayerProto& proto, data_.Reshape(shape); mean_.Reshape({shape[1],shape[2],shape[3]}); if(proto.rgbimage_param().has_meanfile()){ - BlobProto tmp; - ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp); - CHECK_EQ(mean_.count(), tmp.data_size()); - memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size()); + if(proto.rgbimage_param().meanfile().find("binaryproto")!=string::npos){ + BlobProto tmp; + ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp); + CHECK_EQ(mean_.count(), tmp.data_size()); + memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size()); + }else{ + SingleLabelImageRecord tmp; + ReadProtoFromBinaryFile(proto.rgbimage_param().meanfile().c_str(), &tmp); + CHECK_EQ(mean_.count(), tmp.data_size()); + memcpy(mean_.mutable_cpu_data(), tmp.data().data(), sizeof(float)*tmp.data_size()); + } }else{ memset(mean_.mutable_cpu_data(),0,sizeof(float)*mean_.count()); } @@ -671,7 +678,10 @@ void ShardDataLayer::ComputeFeature(bool training, const vector<SLayer>& srclaye } for(auto& record: records_){ string key; - shard_->Next(&key, &record); + if(!shard_->Next(&key, &record)){ + shard_->SeekToFirst(); + CHECK(shard_->Next(&key, &record)); + } } }
