Repository: incubator-singa Updated Branches: refs/heads/master a6e7690bd -> 0695daa66
http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/requirements.txt ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/requirements.txt b/doc/en/docs/notebook/requirements.txt new file mode 100644 index 0000000..21e293b --- /dev/null +++ b/doc/en/docs/notebook/requirements.txt @@ -0,0 +1,5 @@ +matplotlib=2.0.0=np112py27_0 +nb_conda_kernels=2.0.0=py27_0 +nb_conda=2.0.0=py27_0 +pillow=4.0.0=py27_1 +tqdm=4.11.2=py27_0 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/rnn.ipynb ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/rnn.ipynb b/doc/en/docs/notebook/rnn.ipynb new file mode 100644 index 0000000..054ac19 --- /dev/null +++ b/doc/en/docs/notebook/rnn.ipynb @@ -0,0 +1,257 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RNN for Character Level Language Modeling" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Dataset pre-processing\n", + "\n", + "### sample data" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "import cPickle as pickle\n", + "import numpy as np\n", + "import argparse\n", + "\n", + "# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))\n", + "from singa import layer\n", + "from singa import loss\n", + "from singa import device\n", + "from singa import tensor\n", + "from singa import optimizer\n", + "from singa import initializer\n", + "from singa.proto import model_pb2\n", + "from tqdm import tnrange" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "class Data(object):\n", + "\n", + " def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):\n", + " '''Data object for loading a plain text file.\n", + " Args:\n", + " fpath, path to the text file.\n", + " train_ratio, split the text file into train and test sets, where\n", + " train_ratio of the characters are in the train set.\n", + " '''\n", + " self.raw_data = open(fpath, 'r').read() # read text file\n", + " chars = list(set(self.raw_data))\n", + " self.vocab_size = len(chars)\n", + " self.char_to_idx = {ch: i for i, ch in enumerate(chars)}\n", + " self.idx_to_char = {i: ch for i, ch in enumerate(chars)}\n", + " data = [self.char_to_idx[c] for c in self.raw_data]\n", + " # seq_length + 1 for the data + label\n", + " nsamples = len(data) / (1 + seq_length)\n", + " data = data[0:nsamples * (1 + seq_length)]\n", + " data = np.asarray(data, dtype=np.int32)\n", + " data = np.reshape(data, (-1, seq_length + 1))\n", + " # shuffle all sequences\n", + " np.random.shuffle(data)\n", + " self.train_dat = data[0:int(data.shape[0]*train_ratio)]\n", + " self.num_train_batch = self.train_dat.shape[0] / batch_size\n", + " self.val_dat = data[self.train_dat.shape[0]:]\n", + " self.num_test_batch = self.val_dat.shape[0] / batch_size\n", + " print 'train dat', self.train_dat.shape\n", + " print 'val dat', self.val_dat.shape\n", + " \n", + "def numpy2tensors(npx, npy, dev):\n", + " '''batch, seq, dim -- > seq, batch, dim'''\n", + " tmpx = np.swapaxes(npx, 0, 1)\n", + " tmpy = np.swapaxes(npy, 0, 1)\n", + " inputs = []\n", + " labels = []\n", + " for t in range(tmpx.shape[0]):\n", + " x = tensor.from_numpy(tmpx[t])\n", + " y = tensor.from_numpy(tmpy[t])\n", + " x.to_device(dev)\n", + " y.to_device(dev)\n", + " inputs.append(x)\n", + " labels.append(y)\n", + " return inputs, labels\n", + "\n", + "\n", + "def convert(batch, batch_size, seq_length, vocab_size, dev):\n", + " '''convert a batch of data into a sequence of input tensors'''\n", + " y = batch[:, 1:]\n", + " x1 = batch[:, :seq_length]\n", + " x = np.zeros((batch_size, seq_length, vocab_size), dtype=np.float32)\n", + " for b in range(batch_size):\n", + " for t in range(seq_length):\n", + " c = x1[b, t]\n", + " x[b, t, c] = 1\n", + " return numpy2tensors(x, y, dev)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Create the network" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": false + }, + "outputs": [], + "source": [ + "\n", + "def get_lr(epoch):\n", + " return 0.001 / float(1 << (epoch / 50))\n", + "\n", + "data = Data('static/linux_input.txt')\n", + "# SGD with L2 gradient normalization\n", + "opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))\n", + "cuda = device.create_cuda_gpu()\n", + "rnn = layer.LSTM(name='lstm', hidden_size=32, num_stacks=1, dropout=0.5, input_sample_shape=(data.vocab_size,))\n", + "rnn.to_device(cuda)\n", + "rnn_w = rnn.param_values()[0]\n", + "rnn_w.uniform(-0.08, 0.08) \n", + "\n", + "dense = layer.Dense('dense', data.vocab_size, input_sample_shape=(32,))\n", + "dense.to_device(cuda)\n", + "dense_w = dense.param_values()[0]\n", + "dense_b = dense.param_values()[1]\n", + "print 'dense w ', dense_w.shape\n", + "print 'dense b ', dense_b.shape\n", + "initializer.uniform(dense_w, dense_w.shape[0], 0)\n", + "print 'dense weight l1 = %f' % (dense_w.l1())\n", + "dense_b.set_value(0)\n", + "print 'dense b l1 = %f' % (dense_b.l1())\n", + "\n", + "g_dense_w = tensor.Tensor(dense_w.shape, cuda)\n", + "g_dense_b = tensor.Tensor(dense_b.shape, cuda)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conduct SGD" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "lossfun = loss.SoftmaxCrossEntropy()\n", + "train_loss = 0\n", + "for epoch in range(3):\n", + " bar = tnrange(data.num_train_batch, desc='Epoch %d' % 0)\n", + " for b in bar:\n", + " batch = data.train_dat[b * batch_size: (b + 1) * batch_size]\n", + " inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, cuda)\n", + " inputs.append(tensor.Tensor())\n", + " inputs.append(tensor.Tensor())\n", + "\n", + " outputs = rnn.forward(model_pb2.kTrain, inputs)[0:-2]\n", + " grads = []\n", + " batch_loss = 0\n", + " g_dense_w.set_value(0.0)\n", + " g_dense_b.set_value(0.0)\n", + " for output, label in zip(outputs, labels):\n", + " act = dense.forward(model_pb2.kTrain, output)\n", + " lvalue = lossfun.forward(model_pb2.kTrain, act, label)\n", + " batch_loss += lvalue.l1()\n", + " grad = lossfun.backward()\n", + " grad /= batch_size\n", + " grad, gwb = dense.backward(model_pb2.kTrain, grad)\n", + " grads.append(grad)\n", + " g_dense_w += gwb[0]\n", + " g_dense_b += gwb[1]\n", + " # print output.l1(), act.l1()\n", + " bar.set_postfix(train_loss=batch_loss / seq_length)\n", + " train_loss += batch_loss\n", + "\n", + " grads.append(tensor.Tensor())\n", + " grads.append(tensor.Tensor())\n", + " g_rnn_w = rnn.backward(model_pb2.kTrain, grads)[1][0]\n", + " dense_w, dense_b = dense.param_values()\n", + " opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')\n", + " opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')\n", + " opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')\n", + " print '\\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch / seq_length)\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Checkpoint" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [ + "with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:\n", + " print 'saving model to %s' % model_path\n", + " d = {}\n", + " for name, w in zip(['rnn_w', 'dense_w', 'dense_b'],[rnn_w, dense_w, dense_b]):\n", + " d[name] = tensor.to_numpy(w)\n", + " d['idx_to_char'] = data.idx_to_char\n", + " d['char_to_idx'] = data.char_to_idx\n", + " d['hidden_size'] = hidden_size\n", + " d['num_stacks'] = num_stacks\n", + " d['dropout'] = dropout\n", + " pickle.dump(d, fd)" + ] + } + ], + "metadata": { + "anaconda-cloud": {}, + "kernelspec": { + "display_name": "Python [conda env:conda]", + "language": "python", + "name": "conda-env-conda-py" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.13" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/bp.PNG ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/static/bp.PNG b/doc/en/docs/notebook/static/bp.PNG new file mode 100644 index 0000000..ac5db33 Binary files /dev/null and b/doc/en/docs/notebook/static/bp.PNG differ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/digit.jpg ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/static/digit.jpg b/doc/en/docs/notebook/static/digit.jpg new file mode 100644 index 0000000..8350d88 Binary files /dev/null and b/doc/en/docs/notebook/static/digit.jpg differ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/models.PNG ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/static/models.PNG b/doc/en/docs/notebook/static/models.PNG new file mode 100644 index 0000000..9ebcfa2 Binary files /dev/null and b/doc/en/docs/notebook/static/models.PNG differ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/sgd.png ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/static/sgd.png b/doc/en/docs/notebook/static/sgd.png new file mode 100644 index 0000000..9eac916 Binary files /dev/null and b/doc/en/docs/notebook/static/sgd.png differ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/doc/en/docs/notebook/static/singav1-sw.png ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/static/singav1-sw.png b/doc/en/docs/notebook/static/singav1-sw.png new file mode 100644 index 0000000..e443c6e Binary files /dev/null and b/doc/en/docs/notebook/static/singav1-sw.png differ http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/python/singa/layer.py ---------------------------------------------------------------------- diff --git a/python/singa/layer.py b/python/singa/layer.py index 7975042..6302b45 100644 --- a/python/singa/layer.py +++ b/python/singa/layer.py @@ -27,7 +27,13 @@ Example usages:: # create a convolution layer conv = layer.Conv2D('conv', 32, 3, 1, pad=1, input_sample_shape=(3, 32, 32)) + + # init param values + w, b = conv.param_values() + w.guassian(0, 0.01) + b.set_value(0) conv.to_device(dev) # move the layer data onto a CudaGPU device + x = tensor.Tensor((3, 32, 32), dev) x.uniform(-1, 1) y = conv.foward(True, x) @@ -766,8 +772,8 @@ class Merge(Layer): Returns: A list of replicated grad, one per source layer ''' - assert isinstance(grad, tensor.Tensor), 'The input must be Tensor'\ - ' instead of %s' % type(grad).__name__ + assert isinstance(grad, tensor.Tensor), 'The input must be Tensor' \ + ' instead of %s' % type(grad).__name__ return [grad] * self.num_input, [] # * self.num_input @@ -902,7 +908,8 @@ class Slice(Layer): def get_output_sample_shape(self): out = [] for i in range(len(self.conf.slice_conf.slice_point) + 1): - out.append(self.layer.GetOutputSampleShape(i)) + out.append(self.layer.GetOutputSampleShapeAt(i)) + return out def forward(self, flag, x): '''Slice the input tensor on the given axis. http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/5144bcf1/python/singa/tensor.py ---------------------------------------------------------------------- diff --git a/python/singa/tensor.py b/python/singa/tensor.py index 7dee9f5..82361c5 100644 --- a/python/singa/tensor.py +++ b/python/singa/tensor.py @@ -60,6 +60,8 @@ from .proto import core_pb2 from . import singa_wrap as singa import device as pydevice +int32 = core_pb2.kInt +float32 = core_pb2.kFloat32 class Tensor(object): '''Create a Py Tensor, which wraps a swig converted Tensor from CPP Tensor
