Merge PR #232 for training AlexNet over ImageNet
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/53639b7c Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/53639b7c Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/53639b7c Branch: refs/heads/dev Commit: 53639b7ce8ddbde2e47473701ab414548575849b Parents: 17bfb19 8051720 Author: Wei Wang <[email protected]> Authored: Wed Aug 10 14:00:24 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Wed Aug 10 14:00:24 2016 +0800 ---------------------------------------------------------------------- CMakeLists.txt | 8 +- examples/CMakeLists.txt | 1 + examples/imagenet/CMakeLists.txt | 16 ++ examples/imagenet/README.md | 58 +++++ examples/imagenet/alexnet.cc | 410 ++++++++++++++++++++++++++++++++++ examples/imagenet/create_data.sh | 3 + examples/imagenet/ilsvrc12.cc | 70 ++++++ examples/imagenet/ilsvrc12.h | 380 +++++++++++++++++++++++++++++++ examples/imagenet/run.sh | 3 + include/singa/io/snapshot.h | 8 +- include/singa/utils/timer.h | 6 +- src/core/tensor/tensor.cc | 6 +- src/io/binfile_reader.cc | 6 +- src/io/jpg_encoder.cc | 2 +- src/io/snapshot.cc | 8 +- 15 files changed, 967 insertions(+), 18 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/CMakeLists.txt ---------------------------------------------------------------------- diff --cc CMakeLists.txt index c1d0521,8c6afad..9efadc0 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@@ -18,14 -18,13 +18,14 @@@ SET(SINGA_INCLUDE_DI "${CMAKE_SOURCE_DIR}/include;${CMAKE_SOURCE_DIR}/lib/cnmem/include;${PROJECT_BINARY_DIR}") INCLUDE_DIRECTORIES(${SINGA_INCLUDE_DIR}) - OPTION(USE_CBLAS "Use CBlas libs" OFF) - OPTION(USE_CUDA "Use Cuda libs" OFF) - OPTION(USE_CUDNN "Use Cudnn libs" OFF) + OPTION(USE_CBLAS "Use CBlas libs" ON) + OPTION(USE_CUDA "Use Cuda libs" ON) + OPTION(USE_CUDNN "Use Cudnn libs" ON) OPTION(USE_OPENCV "Use opencv" OFF) OPTION(USE_LMDB "Use LMDB libs" OFF) - OPTION(USE_PYTHON "Generate py wrappers" OFF) + OPTION(USE_PYTHON "Generate py wrappers" ON) OPTION(USE_OPENCL "Use OpenCL" OFF) +OPTION(ENABLE_DIST "enable distributed training" OFF) #OPTION(BUILD_OPENCL_TESTS "Build OpenCL tests" OFF) INCLUDE("cmake/Dependencies.cmake") http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/examples/imagenet/README.md ---------------------------------------------------------------------- diff --cc examples/imagenet/README.md index 0000000,2e0389a..be6797c mode 000000,100644..100644 --- a/examples/imagenet/README.md +++ b/examples/imagenet/README.md @@@ -1,0 -1,43 +1,58 @@@ -# Example of alexnet ++# Train AlexNet over ImageNet ++ ++Convolution neural network (CNN) is a type of feed-forward neural ++network widely used for image and video classification. In this example, we will ++use a [deep CNN model](http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks) ++to do image classification against the ImageNet dataset. ++ ++## Instructions ++ ++### Compile SINGA ++ ++Please compile SINGA with CUDA, CUDNN and OpenCV. You can manually turn on the ++options in CMakeLists.txt or run `ccmake ..` in build/ folder. ++ ++We have tested CUDNN V4 and V5 (V5 requires CUDA 7.5) + + ### Data download + * Please refer to step1-3 on [Instructions to create ImageNet 2012 data](https://github.com/amd/OpenCL-caffe/wiki/Instructions-to-create-ImageNet-2012-data) + to download and decompress the data. -* You can download the training and validation list by - [get_ilsvrc_aux.sh](https://github.com/BVLC/caffe/blob/master/data/ilsvrc12/get_ilsvrc_aux.sh) ++* You can download the training and validation list by ++ [get_ilsvrc_aux.sh](https://github.com/BVLC/caffe/blob/master/data/ilsvrc12/get_ilsvrc_aux.sh) + or from [Imagenet](http://www.image-net.org/download-images). + + ### Data preprocessing -* Assuming you have downloaded the data and the list. ++* Assuming you have downloaded the data and the list. + Now we should transform the data into binary files. You can run: - ++ + sh create_data.sh - - The script will generate a test file(`test.bin`), a mean file(`mean.bin`) and ++ ++ The script will generate a test file(`test.bin`), a mean file(`mean.bin`) and + several training files(`trainX.bin`) in the specified output folder. + * You can also change the parameters in `create_data.sh`. + + `-trainlist <file>`: the file of training list; + + `-trainfolder <folder>`: the folder of training images; + + `-testlist <file>`: the file of test list; + + `-testfolder <floder>`: the folder of test images; - + `-outdata <folder>`: the folder to save output files, including mean, training and test files. ++ + `-outdata <folder>`: the folder to save output files, including mean, training and test files. + The script will generate these files in the specified folder; + + `-filesize <int>`: number of training images that stores in each binary file. + + ### Training + * After preparing data, you can run the following command to train the Alexnet model. + + sh run.sh ++ + * You may change the parameters in `run.sh`. + + `-epoch <int>`: number of epoch to be trained, default is 90; - + `-lr <float>`: base learning rate, the learning rate will decrease each 20 epochs, ++ + `-lr <float>`: base learning rate, the learning rate will decrease each 20 epochs, + more specifically, `lr = lr * exp(0.1 * (epoch / 20))`; + + `-batchsize <int>`: batchsize, it should be changed regarding to your memory; + + `-filesize <int>`: number of training images that stores in each binary file, it is the + same as the `filesize` in data preprocessing; + + `-ntrain <int>`: number of training images; + + `-ntest <int>`: number of test images; + + `-data <folder>`: the folder which stores the binary files, it is exactly the output + folder in data preprocessing step; + + `-pfreq <int>`: the frequency(in batch) of printing current model status(loss and accuracy); - + `-nthreads <int>`: the number of threads to load data which feed to the model. ++ + `-nthreads <int>`: the number of threads to load data which feed to the model. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/53639b7c/src/core/tensor/tensor.cc ---------------------------------------------------------------------- diff --cc src/core/tensor/tensor.cc index d2fec53,2951aa9..e260f9e --- a/src/core/tensor/tensor.cc +++ b/src/core/tensor/tensor.cc @@@ -34,17 -34,13 +34,15 @@@ Tensor::Tensor() { device_ = defaultDev Tensor::Tensor(const Shape &shape, DataType dtype) : data_type_(dtype), device_(defaultDevice), shape_(shape) { - device_ = defaultDevice; - //device_ = defaultDevice; - block_ = device_->NewBlock(Product(shape_) * SizeOf(data_type_)); + size_t size = Product(shape_) * SizeOf(data_type_); + if (size) + block_ = device_->NewBlock(size); } Tensor::Tensor(Shape &&shape, DataType dtype) : data_type_(dtype), device_(defaultDevice), shape_(shape) { - device_ = defaultDevice; - //device_ = defaultDevice; - block_ = device_->NewBlock(Product(shape_) * SizeOf(data_type_)); + size_t size = Product(shape_) * SizeOf(data_type_); + if (size) + block_ = device_->NewBlock(size); } Tensor::Tensor(const Shape &shape, std::shared_ptr<Device> device, DataType dtype)
