Minor updates to pass tests and run examples
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/0a764257 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/0a764257 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/0a764257 Branch: refs/heads/dev Commit: 0a7642576cb0df87c6f08ff00227658c0e03f69f Parents: 72d736a Author: Wei Wang <[email protected]> Authored: Mon Aug 15 21:40:55 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Tue Aug 16 00:12:27 2016 +0800 ---------------------------------------------------------------------- doc/Makefile | 7 +- doc/en/conf.py | 2 +- doc/en/docs.rst | 1 - doc/en/docs/device.rst | 4 +- doc/en/docs/index.rst | 9 ++- doc/en/docs/initializer.rst | 12 ++++ doc/en/docs/layer.rst | 14 ++++ doc/en/docs/loss.rst | 7 ++ doc/en/docs/metric.rst | 8 +++ doc/en/docs/optimizer.rst | 11 ++++ doc/en/docs/overview.rst | 99 ----------------------------- doc/en/docs/tensor.rst | 30 +-------- doc/en/docs/utils.rst | 6 ++ doc/en/downloads.md | 1 - doc/en/index.rst | 9 +-- doc/en/releases/RELEASE_NOTES_0.1.0.md | 99 +++++++++++++++++++++++++++++ doc/en/releases/RELEASE_NOTES_0.2.0.md | 84 ++++++++++++++++++++++++ doc/en/releases/RELEASE_NOTES_0.3.0.md | 37 +++++++++++ doc/zh/conf.py | 2 +- examples/char-rnn/train.py | 7 +- examples/cifar10/README.md | 69 ++++++++++++++++++++ examples/cifar10/alexnet.cc | 2 +- examples/cifar10/train.py | 2 +- examples/mnist/train.py | 1 - src/python/singa/optimizer.py | 2 + test/CMakeLists.txt | 2 +- 26 files changed, 373 insertions(+), 154 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/Makefile ---------------------------------------------------------------------- diff --git a/doc/Makefile b/doc/Makefile index b5282b7..f02595b 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -26,14 +26,9 @@ clean: .PHONY: html html: -<<<<<<< HEAD - cp -rf ../examples docs/ - $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html -======= cp -rf ../examples en/docs/ - $(SPHINXBUILD) -b html -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html + $(SPHINXBUILD) -b html -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) en $(BUILDDIR)/html/en $(SPHINXBUILD) -b html -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) zh $(BUILDDIR)/html/zh ->>>>>>> v1doc @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/conf.py ---------------------------------------------------------------------- diff --git a/doc/en/conf.py b/doc/en/conf.py index 36080d9..46a48f6 100755 --- a/doc/en/conf.py +++ b/doc/en/conf.py @@ -19,7 +19,7 @@ import os import sys sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(1, os.path.abspath('../build/python')) +sys.path.insert(1, os.path.abspath('../../build/python')) # -- General configuration ------------------------------------------------ from recommonmark.parser import CommonMarkParser http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs.rst b/doc/en/docs.rst index 400b12a..c1b143b 100644 --- a/doc/en/docs.rst +++ b/doc/en/docs.rst @@ -3,4 +3,3 @@ Documentation .. toctree:: docs/index - docs/zh/index http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/device.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/device.rst b/doc/en/docs/device.rst index e79d87a..53faf48 100644 --- a/doc/en/docs/device.rst +++ b/doc/en/docs/device.rst @@ -23,9 +23,7 @@ Python API :members: create_cuda_gpus, create_cuda_gpus_on, get_default_device -The following code provides examples of creating devices, - -.. code:: python +The following code provides examples of creating devices:: from singa import device cuda = device.create_cuda_gpu_on(0) # use GPU card of ID 0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/index.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/index.rst b/doc/en/docs/index.rst index 93315de..a2ea540 100644 --- a/doc/en/docs/index.rst +++ b/doc/en/docs/index.rst @@ -1,5 +1,5 @@ -English -======= +Documentation +============= .. toctree:: @@ -7,4 +7,9 @@ English software_stack device tensor + layer + initializer + loss + metric + optimizer examples/index http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/initializer.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/initializer.rst b/doc/en/docs/initializer.rst new file mode 100644 index 0000000..f334497 --- /dev/null +++ b/doc/en/docs/initializer.rst @@ -0,0 +1,12 @@ +Initializer +=========== + +Python API +---------- + +.. automodule:: singa.initializer + :members: uniform, gaussian + :member-order: bysource + +CPP API +-------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/layer.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/layer.rst b/doc/en/docs/layer.rst new file mode 100644 index 0000000..62ef3c3 --- /dev/null +++ b/doc/en/docs/layer.rst @@ -0,0 +1,14 @@ +Layer +====== + +Python API +----------- +.. automodule:: singa.layer + :members: + :member-order: bysource + :show-inheritance: + :undoc-members: + + +CPP API +-------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/loss.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/loss.rst b/doc/en/docs/loss.rst new file mode 100644 index 0000000..27872dd --- /dev/null +++ b/doc/en/docs/loss.rst @@ -0,0 +1,7 @@ +Loss +========= + + +.. automodule:: singa.loss + :members: + :show-inheritance: http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/metric.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/metric.rst b/doc/en/docs/metric.rst new file mode 100644 index 0000000..35fa24e --- /dev/null +++ b/doc/en/docs/metric.rst @@ -0,0 +1,8 @@ +Metric +========= + + +.. automodule:: singa.metric + :members: + :show-inheritance: + :member-order: bysource http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/optimizer.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/optimizer.rst b/doc/en/docs/optimizer.rst new file mode 100644 index 0000000..486c01e --- /dev/null +++ b/doc/en/docs/optimizer.rst @@ -0,0 +1,11 @@ +Optimizer +========= + + +.. automodule:: singa.optimizer + :members: + :member-order: bysource + :show-inheritance: + :undoc-members: + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/overview.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/overview.rst b/doc/en/docs/overview.rst deleted file mode 100644 index 18ad62b..0000000 --- a/doc/en/docs/overview.rst +++ /dev/null @@ -1,99 +0,0 @@ -Introduction -============== - - -SINGA is a general distributed deep learning platform for training big deep -learning models over large datasets. It is designed with an intuitive -programming model based on the layer abstraction. A variety -of popular deep learning models are supported, namely feed-forward models including -convolutional neural networks (CNN), energy models like restricted Boltzmann -machine (RBM), and recurrent neural networks (RNN). Many built-in layers are -provided for users. SINGA architecture is -sufficiently flexible to run synchronous, asynchronous and hybrid training -frameworks. SINGA -also supports different neural net partitioning schemes to parallelize the -training of large models, namely partitioning on batch dimension, feature -dimension or hybrid partitioning. - - -Goals ------ - -As a distributed system, the first goal of SINGA is to have good scalability. In other -words, SINGA is expected to reduce the total training time to achieve certain -accuracy with more computing resources (i.e., machines). - - -The second goal is to make SINGA easy to use. -It is non-trivial for programmers to develop and train models with deep and -complex model structures. Distributed training further increases the burden of -programmers, e.g., data and model partitioning, and network communication. Hence it is essential to -provide an easy to use programming model so that users can implement their deep -learning models/algorithms without much awareness of the underlying distributed -platform. - -Principles ----------- - -Scalability is a challenging research problem for distributed deep learning -training. SINGA provides a general architecture to exploit the scalability of -different training frameworks. Synchronous training frameworks improve the -efficiency of one training iteration, and -asynchronous training frameworks improve the convergence rate. Given a fixed budget -(e.g., cluster size), users can run a hybrid framework that maximizes the -scalability by trading off between efficiency and convergence rate. - -SINGA comes with a programming model designed based on the layer abstraction, which -is intuitive for deep learning models. A variety of -popular deep learning models can be expressed and trained using this programming model. - -System overview ---------------- - -.. figure:: /image/sgd.png - - Figure 1 - SGD flow - -Training a deep learning model is to find the optimal parameters involved in -the transformation functions that generate good features for specific tasks. -The goodness of a set of parameters is measured by a loss function, e.g., -`Cross-Entropy Loss <https://en.wikipedia.org/wiki/Cross_entropy>`_ . Since the -loss functions are usually non-linear and non-convex, it is difficult to get a -closed form solution. Typically, people use the stochastic gradient descent -(SGD) algorithm, which randomly -initializes the parameters and then iteratively updates them to reduce the loss -as shown in Figure 1. - -.. figure:: /image/overview.png - - Figure 2 - SINGA overview - -SGD is used in SINGA to train -parameters of deep learning models. The training workload is distributed over -worker and server units as shown in Figure 2. In each -iteration, every worker calls *TrainOneBatch* function to compute -parameter gradients. *TrainOneBatch* takes a *NeuralNet* object -representing the neural net, and visits layers of the *NeuralNet* in -certain order. The resultant gradients are sent to the local stub that -aggregates the requests and forwards them to corresponding servers for -updating. Servers reply to workers with the updated parameters for the next -iteration. - - -Job submission --------------- - -To submit a job in SINGA (i.e., training a deep learning model), -users pass the job configuration to SINGA driver in the -`main function <programming-guide.html>`_ . The job configuration -specifies the four major components in Figure 2, - - * a `NeuralNet <neural-net.html>`_ describing the neural net structure with the detailed layer setting and their connections; - * a `TrainOneBatch <train-one-batch.html>`_ algorithm which is tailored for different model categories; - * an `Updater <updater.html>`_ defining the protocol for updating parameters at the server side; - * a `Cluster Topology <distributed-training.html>`_ specifying the distributed architecture of workers and servers. - -This process is like the job submission in Hadoop, where users configure their -jobs in the main function to set the mapper, reducer, etc. -In Hadoop, users can configure their jobs with their own (or built-in) mapper and reducer; in SINGA, users -can configure their jobs with their own (or built-in) layer, updater, etc. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/tensor.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/tensor.rst b/doc/en/docs/tensor.rst index 87d26ea..ff6142e 100644 --- a/doc/en/docs/tensor.rst +++ b/doc/en/docs/tensor.rst @@ -21,34 +21,10 @@ type of Device. Python API ---------- -There are two set of tensor functions, -1. Tensor member functions, which would change the internal state of the Tensor instance. -2. tensor module functions, which accepts Tensor instances as arguments and return -Tensor instances. - - -Create Tensor instances -~~~~~~~~~~~~~~~~~~~~~~~ - -.. autoclass:: singa.tensor.Tensor - - -Tensor instances can be constructed from Numpy array, .. automodule:: singa.tensor - :members: from_numpy - - -Set Tensor values -~~~~~~~~~~~~~~~~~ - - - - - - - - - + :members: +CPP API +--------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/docs/utils.rst ---------------------------------------------------------------------- diff --git a/doc/en/docs/utils.rst b/doc/en/docs/utils.rst new file mode 100644 index 0000000..5306719 --- /dev/null +++ b/doc/en/docs/utils.rst @@ -0,0 +1,6 @@ +Misc. +========= + + +.. automodule:: singa.utils + :members: http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/downloads.md ---------------------------------------------------------------------- diff --git a/doc/en/downloads.md b/doc/en/downloads.md index 31e7274..fe0c30a 100644 --- a/doc/en/downloads.md +++ b/doc/en/downloads.md @@ -1,5 +1,4 @@ ## Download SINGA ---- * Latest code: please clone the dev branch from [Github](https://github.com/apache/incubator-singa) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/index.rst ---------------------------------------------------------------------- diff --git a/doc/en/index.rst b/doc/en/index.rst index 50c65d7..1bbbe9a 100755 --- a/doc/en/index.rst +++ b/doc/en/index.rst @@ -2,7 +2,6 @@ sphinx-quickstart on Sat Jul 9 20:36:57 2016. You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. - Welcome to Apache Singa ======================= @@ -35,19 +34,17 @@ Recent News Getting Started --------------- -* The `Introduction <docs/overview.html>`_ page gives an overview of SINGA. +* The `Software stack <docs/software_stack.html>`_ page gives an overview of SINGA. * The `Installation <docs/installation.html>`_ guide describes details on downloading and installing SINGA. -* Please follow the `Quick Start <docs/quick-start.html>`_ guide to run simple applications on SINGA. +* Please follow the `Examples <docs/examples/index.html>`_ guide to run simple applications on SINGA. Documentation ------------- * Documentations are listed `here <docs.html>`_. -* Code API can be found `here <api/index.html>`_. - * Research publication list is available `here <http://www.comp.nus.edu.sg/~dbsystem/singa/research/publication/>`_. How to contribute @@ -74,7 +71,7 @@ Please cite the following two papers if you use SINGA in your research: :hidden: downloads - docs + docs/index .. toctree:: :hidden: http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.1.0.md ---------------------------------------------------------------------- diff --git a/doc/en/releases/RELEASE_NOTES_0.1.0.md b/doc/en/releases/RELEASE_NOTES_0.1.0.md new file mode 100644 index 0000000..2674d90 --- /dev/null +++ b/doc/en/releases/RELEASE_NOTES_0.1.0.md @@ -0,0 +1,99 @@ +#singa-incubating-0.1.0 Release Notes + +--- + +SINGA is a general distributed deep learning platform for training big deep learning models over large datasets. It is +designed with an intuitive programming model based on the layer abstraction. SINGA supports a wide variety of popular +deep learning models. + +This release includes following features: + + * Job management + * [SINGA-3](https://issues.apache.org/jira/browse/SINGA-3) Use Zookeeper to check stopping (finish) time of the system + * [SINGA-16](https://issues.apache.org/jira/browse/SINGA-16) Runtime Process id Management + * [SINGA-25](https://issues.apache.org/jira/browse/SINGA-25) Setup glog output path + * [SINGA-26](https://issues.apache.org/jira/browse/SINGA-26) Run distributed training in a single command + * [SINGA-30](https://issues.apache.org/jira/browse/SINGA-30) Enhance easy-to-use feature and support concurrent jobs + * [SINGA-33](https://issues.apache.org/jira/browse/SINGA-33) Automatically launch a number of processes in the cluster + * [SINGA-34](https://issues.apache.org/jira/browse/SINGA-34) Support external zookeeper service + * [SINGA-38](https://issues.apache.org/jira/browse/SINGA-38) Support concurrent jobs + * [SINGA-39](https://issues.apache.org/jira/browse/SINGA-39) Avoid ssh in scripts for single node environment + * [SINGA-43](https://issues.apache.org/jira/browse/SINGA-43) Remove Job-related output from workspace + * [SINGA-56](https://issues.apache.org/jira/browse/SINGA-56) No automatic launching of zookeeper service + * [SINGA-73](https://issues.apache.org/jira/browse/SINGA-73) Refine the selection of available hosts from host list + + + * Installation with GNU Auto tool + * [SINGA-4](https://issues.apache.org/jira/browse/SINGA-4) Refine thirdparty-dependency installation + * [SINGA-13](https://issues.apache.org/jira/browse/SINGA-13) Separate intermediate files of compilation from source files + * [SINGA-17](https://issues.apache.org/jira/browse/SINGA-17) Add root permission within thirdparty/install. + * [SINGA-27](https://issues.apache.org/jira/browse/SINGA-27) Generate python modules for proto objects + * [SINGA-53](https://issues.apache.org/jira/browse/SINGA-53) Add lmdb compiling options + * [SINGA-62](https://issues.apache.org/jira/browse/SINGA-62) Remove building scrips and auxiliary files + * [SINGA-67](https://issues.apache.org/jira/browse/SINGA-67) Add singatest into build targets + + + * Distributed training + * [SINGA-7](https://issues.apache.org/jira/browse/SINGA-7) Implement shared memory Hogwild algorithm + * [SINGA-8](https://issues.apache.org/jira/browse/SINGA-8) Implement distributed Hogwild + * [SINGA-19](https://issues.apache.org/jira/browse/SINGA-19) Slice large Param objects for load-balance + * [SINGA-29](https://issues.apache.org/jira/browse/SINGA-29) Update NeuralNet class to enable layer partition type customization + * [SINGA-24](https://issues.apache.org/jira/browse/SINGA-24) Implement Downpour training framework + * [SINGA-32](https://issues.apache.org/jira/browse/SINGA-32) Implement AllReduce training framework + * [SINGA-57](https://issues.apache.org/jira/browse/SINGA-57) Improve Distributed Hogwild + + + * Training algorithms for different model categories + * [SINGA-9](https://issues.apache.org/jira/browse/SINGA-9) Add Support for Restricted Boltzman Machine (RBM) model + * [SINGA-10](https://issues.apache.org/jira/browse/SINGA-10) Add Support for Recurrent Neural Networks (RNN) + + + * Checkpoint and restore + * [SINGA-12](https://issues.apache.org/jira/browse/SINGA-12) Support Checkpoint and Restore + + + * Unit test + * [SINGA-64](https://issues.apache.org/jira/browse/SINGA-64) Add the test module for utils/common + + + * Programming model + * [SINGA-36](https://issues.apache.org/jira/browse/SINGA-36) Refactor job configuration, driver program and scripts + * [SINGA-37](https://issues.apache.org/jira/browse/SINGA-37) Enable users to set parameter sharing in model configuration + * [SINGA-54](https://issues.apache.org/jira/browse/SINGA-54) Refactor job configuration to move fields in ModelProto out + * [SINGA-55](https://issues.apache.org/jira/browse/SINGA-55) Refactor main.cc and singa.h + * [SINGA-61](https://issues.apache.org/jira/browse/SINGA-61) Support user defined classes + * [SINGA-65](https://issues.apache.org/jira/browse/SINGA-65) Add an example of writing user-defined layers + + + * Other features + * [SINGA-6](https://issues.apache.org/jira/browse/SINGA-6) Implement thread-safe singleton + * [SINGA-18](https://issues.apache.org/jira/browse/SINGA-18) Update API for displaying performance metric + * [SINGA-77](https://issues.apache.org/jira/browse/SINGA-77) Integrate with Apache RAT + + +Some bugs are fixed during the development of this release + + * [SINGA-2](https://issues.apache.org/jira/browse/SINGA-2) Check failed: zsock_connect + * [SINGA-5](https://issues.apache.org/jira/browse/SINGA-5) Server early terminate when zookeeper singa folder is not initially empty + * [SINGA-15](https://issues.apache.org/jira/browse/SINGA-15) Fixg a bug from ConnectStub function which gets stuck for connecting layer_dealer_ + * [SINGA-22](https://issues.apache.org/jira/browse/SINGA-22) Cannot find openblas library when it is installed in default path + * [SINGA-23](https://issues.apache.org/jira/browse/SINGA-23) Libtool version mismatch error. + * [SINGA-28](https://issues.apache.org/jira/browse/SINGA-28) Fix a bug from topology sort of Graph + * [SINGA-42](https://issues.apache.org/jira/browse/SINGA-42) Issue when loading checkpoints + * [SINGA-44](https://issues.apache.org/jira/browse/SINGA-44) A bug when reseting metric values + * [SINGA-46](https://issues.apache.org/jira/browse/SINGA-46) Fix a bug in updater.cc to scale the gradients + * [SINGA-47](https://issues.apache.org/jira/browse/SINGA-47) Fix a bug in data layers that leads to out-of-memory when group size is too large + * [SINGA-48](https://issues.apache.org/jira/browse/SINGA-48) Fix a bug in trainer.cc that assigns the same NeuralNet instance to workers from diff groups + * [SINGA-49](https://issues.apache.org/jira/browse/SINGA-49) Fix a bug in HandlePutMsg func that sets param fields to invalid values + * [SINGA-66](https://issues.apache.org/jira/browse/SINGA-66) Fix bugs in Worker::RunOneBatch function and ClusterProto + * [SINGA-79](https://issues.apache.org/jira/browse/SINGA-79) Fix bug in singatool that can not parse -conf flag + + +Features planned for the next release + + * [SINGA-11](https://issues.apache.org/jira/browse/SINGA-11) Start SINGA using Mesos + * [SINGA-31](https://issues.apache.org/jira/browse/SINGA-31) Extend Blob to support xpu (cpu or gpu) + * [SINGA-35](https://issues.apache.org/jira/browse/SINGA-35) Add random number generators + * [SINGA-40](https://issues.apache.org/jira/browse/SINGA-40) Support sparse Param update + * [SINGA-41](https://issues.apache.org/jira/browse/SINGA-41) Support single node single GPU training + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.2.0.md ---------------------------------------------------------------------- diff --git a/doc/en/releases/RELEASE_NOTES_0.2.0.md b/doc/en/releases/RELEASE_NOTES_0.2.0.md new file mode 100644 index 0000000..38f498a --- /dev/null +++ b/doc/en/releases/RELEASE_NOTES_0.2.0.md @@ -0,0 +1,84 @@ +#singa-incubating-0.2.0 Release Notes + +--- + +SINGA is a general distributed deep learning platform for training big deep +learning models over large datasets. It is designed with an intuitive +programming model based on the layer abstraction. SINGA supports a wide variety +of popular deep learning models. + +This release includes the following **major features**: + +* [Training on GPU](../docs/gpu.html) enables training of complex models on a single node with multiple GPU cards. +* [Hybrid neural net partitioning](../docs/hybrid.html) supports data and model parallelism at the same time. +* [Python wrapper](../docs/python.html) makes it easy to configure the job, including neural net and SGD algorithm. +* [RNN model and BPTT algorithm](../docs/general-rnn.html) are implemented to support applications based on RNN models, e.g., GRU. +* [Cloud software integration](../docs/distributed-training.md) includes Mesos, Docker and HDFS. + + +**More details** are listed as follows, + + * Programming model + * [SINGA-80] New Blob Level and Address Level Math Operation Interface + * [SINGA-82] Refactor input layers using data store abstraction + * [SINGA-87] Replace exclude field to include field for layer configuration + * [SINGA-110] Add Layer member datavec_ and gradvec_ + * [SINGA-120] Implemented GRU and BPTT (BPTTWorker) + + + * Neuralnet layers + * [SINGA-91] Add SoftmaxLayer and ArgSortLayer + * [SINGA-106] Add dummy layer for test purpose + * [SINGA-120] Implemented GRU and BPTT (GRULayer and OneHotLayer) + + + * GPU training support + * [SINGA-100] Implement layers using CUDNN for GPU training + * [SINGA-104] Add Context Class + * [SINGA-105] Update GUN make files for compiling cuda related code + * [SINGA-98] Add Support for AlexNet ImageNet Classification Model + + + * Model/Hybrid partition + * [SINGA-109] Refine bridge layers + * [SINGA-111] Add slice, concate and split layers + * [SINGA-113] Model/Hybrid Partition Support + + + * Python binding + * [SINGA-108] Add Python wrapper to singa + + + * Predict-only mode + * [SINGA-85] Add functions for extracting features and test new data + + + * Integrate with third-party tools + * [SINGA-11] Start SINGA on Apache Mesos + * [SINGA-78] Use Doxygen to generate documentation + * [SINGA-89] Add Docker support + + + * Unit test + * [SINGA-95] Add make test after building + + + * Other improvment + * [SINGA-84] Header Files Rearrange + * [SINGA-93] Remove the asterisk in the log tcp://169.254.12.152:*:49152 + * [SINGA-94] Move call to google::InitGoogleLogging() from Driver::Init() to main() + * [SINGA-96] Add Momentum to Cifar10 Example + * [SINGA-101] Add ll (ls -l) command in .bashrc file when using docker + * [SINGA-114] Remove short logs in tmp directory + * [SINGA-115] Print layer debug information in the neural net graph file + * [SINGA-118] Make protobuf LayerType field id easy to assign + * [SIGNA-97] Add HDFS Store + + + * Bugs fixed + * [SINGA-85] Fix compilation errors in examples + * [SINGA-90] Miscellaneous trivial bug fixes + * [SINGA-107] Error from loading pre-trained params for training stacked RBMs + * [SINGA-116] Fix a bug in InnerProductLayer caused by weight matrix sharing + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/en/releases/RELEASE_NOTES_0.3.0.md ---------------------------------------------------------------------- diff --git a/doc/en/releases/RELEASE_NOTES_0.3.0.md b/doc/en/releases/RELEASE_NOTES_0.3.0.md new file mode 100644 index 0000000..c169e12 --- /dev/null +++ b/doc/en/releases/RELEASE_NOTES_0.3.0.md @@ -0,0 +1,37 @@ +#singa-incubating-0.3.0 Release Notes + +--- + +SINGA is a general distributed deep learning platform for training big deep +learning models over large datasets. It is designed with an intuitive +programming model based on the layer abstraction. SINGA supports a wide variety +of popular deep learning models. + +This release includes following features: + + * GPU Support + * [SINGA-131] Implement and optimize hybrid training using both CPU and GPU + * [SINGA-136] Support cuDNN v4 + * [SINGA-134] Extend SINGA to run over a GPU cluster + * [Singa-157] Change the priority of cudnn library and install libsingagpu.so + + * Remove Dependences + * [SINGA-156] Remove the dependency on ZMQ for single process training + * [SINGA-155] Remove zookeeper for single-process training + + * Python Binding + * [SINGA-126] Python Binding for Interactive Training + + * Other Improvements + * [SINGA-80] New Blob Level and Address Level Math Operation Interface + * [SINGA-130] Data Prefetching + * [SINGA-145] New SGD based optimization Updaters: AdaDelta, Adam, AdamMax + + * Bugs Fixed + * [SINGA-148] Race condition between Worker threads and Driver + * [SINGA-150] Mesos Docker container failed + * [SIGNA-141] Undesired Hash collision when locating process id to worker⦠+ * [SINGA-149] Docker build fail + * [Singa-143] The compilation cannot detect libsingagpu.so file + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/doc/zh/conf.py ---------------------------------------------------------------------- diff --git a/doc/zh/conf.py b/doc/zh/conf.py index 332a0d1..921a27a 100755 --- a/doc/zh/conf.py +++ b/doc/zh/conf.py @@ -19,7 +19,7 @@ import os import sys sys.path.insert(0, os.path.abspath('.')) -sys.path.insert(1, os.path.abspath('../build/python')) +sys.path.insert(1, os.path.abspath('../../build/python')) # -- General configuration ------------------------------------------------ from recommonmark.parser import CommonMarkParser http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/char-rnn/train.py ---------------------------------------------------------------------- diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py index 83771c2..137df80 100644 --- a/examples/char-rnn/train.py +++ b/examples/char-rnn/train.py @@ -98,9 +98,9 @@ def get_lr(epoch): def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, - num_stacks=1, lr=0.001, dropout=0.5, model_path='model.bin'): + num_stacks=1, dropout=0.5, model_path='model.bin'): # SGD with L2 gradient normalization - opt = optimizer.SGD(constraint=optimizer.L2Constraint(5)) + opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5)) cuda = device.create_cuda_gpu() rnn = layer.LSTM( name='lstm', @@ -126,7 +126,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, dense_b = dense.param_values()[1] print 'dense w ', dense_w.shape print 'dense b ', dense_b.shape - initializer.uniform(dense_w, dense_w.shape[0], dense_w.shape[1]) + initializer.uniform(dense_w, dense_w.shape[0], 0) print 'dense weight l1 = %f' % (dense_w.l1()) dense_b.set_value(0) print 'dense b l1 = %f' % (dense_b.l1()) @@ -154,6 +154,7 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, lvalue = lossfun.forward(model_pb2.kTrain, act, label) batch_loss += lvalue.l1() grad = lossfun.backward() + grad /= batch_size grad, gwb = dense.backward(model_pb2.kTrain, grad) grads.append(grad) g_dense_w += gwb[0] http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/README.md ---------------------------------------------------------------------- diff --git a/examples/cifar10/README.md b/examples/cifar10/README.md new file mode 100644 index 0000000..5333e6f --- /dev/null +++ b/examples/cifar10/README.md @@ -0,0 +1,69 @@ +# Train CNN over Cifar-10 + + +Convolution neural network (CNN) is a type of feed-forward artificial neural +network widely used for image and video classification. In this example, we +will train three deep CNN models to do image classification for the CIFAR-10 dataset, + +1. [AlexNet](https://code.google.com/p/cuda-convnet/source/browse/trunk/example-layers/layers-18pct.cfg) +the best validation accuracy (without data augmentation) we achieved was about 82%. + +2. [VGGNet](http://torch.ch/blog/2015/07/30/cifar.html), the best validation accuracy (without data augmentation) we achieved was about 89%. +3. [ResNet](https://github.com/facebook/fb.resnet.torch), the best validation accuracy (without data augmentation) we achieved was about 83%. + + +## Instructions + + +### SINGA installation + +Users can compile and install SINGA from source or install the Python version. +The code can ran on both CPU and GPU. For GPU training, CUDA and CUDNN (V4 or V5) +are required. Please refer to the installation page for detailed instructions. + + + +### Training + +There are four training programs + +1. train.py. The following command would train the VGG model using the python +version of the Cifar-10 dataset in 'cifar-10-batches-py' folder. + + python train.py vgg cifar-10-batches-py + + To train other models, please replace 'vgg' to 'alexnet' or 'resnet'. By default + the training would run on a CudaGPU device, to run it on CppCPU, add an additional + argument + + python train.py vgg cifar-10-batches-py --use_cpu + +2. alexnet.cc. It trains the AlexNet model using the CPP APIs on a CudaGPU, + + run.sh + +3. alexnet-parallel.cc. It trains the AlexNet model using the CPP APIs on two CudaGPU devices. +The two devices run synchronously to compute the gradients of the mode parameters, which are +averaged on the host CPU device and then be applied to update the parameters. + + run-parallel.sh + +4. vgg-parallel.cc. It train the VGG model using the CPP APIs on two CudaGPU devices similar to alexnet-parallel.cc. + +### Prediction + +predict.py includes the prediction function + + def predict(net, images, dev, topk=5) + +The net is created by loading the previously trained model; Images consist of +a numpy array of images (one row per image); dev is the training device, e.g., +a CudaGPU device or the host CppCPU device; topk labels of each image would be +returned. + + + + + + + http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/alexnet.cc ---------------------------------------------------------------------- diff --git a/examples/cifar10/alexnet.cc b/examples/cifar10/alexnet.cc index 9e8a7d8..8a506d2 100644 --- a/examples/cifar10/alexnet.cc +++ b/examples/cifar10/alexnet.cc @@ -161,7 +161,7 @@ void Train(int num_epoch, string data_dir) { auto net = CreateNet(); SGD sgd; OptimizerConf opt_conf; - // opt_conf.set_momentum(0.9); + opt_conf.set_momentum(0.9); auto reg = opt_conf.mutable_regularizer(); reg->set_coefficient(0.004); sgd.Setup(opt_conf); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/cifar10/train.py ---------------------------------------------------------------------- diff --git a/examples/cifar10/train.py b/examples/cifar10/train.py index 2091ee5..8f596e5 100644 --- a/examples/cifar10/train.py +++ b/examples/cifar10/train.py @@ -106,7 +106,7 @@ def train(data, net, max_epoch, get_lr, weight_decay, batch_size=100, dev = device.create_cuda_gpu() net.to_device(dev) - opt = optimizer.SGD(momentum=0.9, decay=weight_decay) + opt = optimizer.SGD(momentum=0.9, weight_decay=weight_decay) for (p, specs) in zip(net.param_names(), net.param_specs()): opt.register(p, specs) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/examples/mnist/train.py ---------------------------------------------------------------------- diff --git a/examples/mnist/train.py b/examples/mnist/train.py index 55c7cbb..0a00358 100644 --- a/examples/mnist/train.py +++ b/examples/mnist/train.py @@ -70,7 +70,6 @@ def train(data_file, use_gpu, num_epoch=10, batch_size=100): print "num_train_batch = %d " % (num_train_batch) for epoch in range(num_epoch): trainerrorsum = 0.0 - validerrorsum = 0.0 print 'Epoch %d' % epoch for b in range(num_train_batch): # positive phase http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/src/python/singa/optimizer.py ---------------------------------------------------------------------- diff --git a/src/python/singa/optimizer.py b/src/python/singa/optimizer.py index 74e6ade..00380e0 100644 --- a/src/python/singa/optimizer.py +++ b/src/python/singa/optimizer.py @@ -234,6 +234,8 @@ class Nesterov(Optimizer): def apply_with_lr(self, epoch, lr, grad, value, name): self.apply_regularizer_constraint(epoch, value, grad, name) + if name is not None and name in self.learning_rate_multiplier: + lr = lr * self.learning_rate_multiplier[name] self.opt.Apply(epoch, lr, name, grad.singa_tensor, value.singa_tensor) return value http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/0a764257/test/CMakeLists.txt ---------------------------------------------------------------------- diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 6c21034..593cfd6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -22,7 +22,7 @@ ADD_EXECUTABLE(test_singa "gtest/gtest_main.cc" ${singa_test_source}) ADD_DEPENDENCIES(test_singa singa_core singa_utils) #MESSAGE(STATUS "link libs" ${singa_linker_libs}) TARGET_LINK_LIBRARIES(test_singa gtest singa_core singa_utils singa_model - singa_io proto protobuf ${SINGA_LINKER_LIBS}) + singa_io singa_proto protobuf ${SINGA_LINKER_LIBS}) IF(UNIX AND (NOT APPLE)) LIST(APPEND LINK_FLAGS "-pthread") ENDIF()
