Repository: incubator-singa Updated Branches: refs/heads/master e5c438c34 -> 0f86cd5a3
SINGA-290 Upgrade to Python 3 Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/8c9b594b Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/8c9b594b Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/8c9b594b Branch: refs/heads/master Commit: 8c9b594ba566ed1fe865fd7ec6360523cce706e5 Parents: c94b3df Author: Moaz Reyad <moazre...@gmail.com> Authored: Thu Jun 22 13:56:16 2017 +0800 Committer: Wei Wang <wang...@comp.nus.edu.sg> Committed: Thu Aug 3 18:15:59 2017 +0800 ---------------------------------------------------------------------- doc/en/docs/notebook/rnn.ipynb | 317 ++++++++++++++++++++++++++++++------ python/setup.py.in | 5 +- 2 files changed, 269 insertions(+), 53 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8c9b594b/doc/en/docs/notebook/rnn.ipynb ---------------------------------------------------------------------- diff --git a/doc/en/docs/notebook/rnn.ipynb b/doc/en/docs/notebook/rnn.ipynb index 054ac19..f05c5b6 100644 --- a/doc/en/docs/notebook/rnn.ipynb +++ b/doc/en/docs/notebook/rnn.ipynb @@ -18,15 +18,23 @@ }, { "cell_type": "code", - "execution_count": 4, - "metadata": { - "collapsed": true - }, + "execution_count": 1, + "metadata": {}, "outputs": [], "source": [ - "import cPickle as pickle\n", + "from __future__ import division\n", + "from __future__ import print_function\n", + "from future import standard_library\n", + "standard_library.install_aliases()\n", + "from builtins import zip\n", + "from builtins import range\n", + "from builtins import object\n", + "from past.utils import old_div\n", + "import pickle as pickle\n", "import numpy as np\n", "import argparse\n", + "import sys\n", + "from tqdm import tnrange, tqdm_notebook\n", "\n", "# sys.path.append(os.path.join(os.path.dirname(__file__), '../../build/python'))\n", "from singa import layer\n", @@ -36,12 +44,12 @@ "from singa import optimizer\n", "from singa import initializer\n", "from singa.proto import model_pb2\n", - "from tqdm import tnrange" + "from singa import utils" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "collapsed": true }, @@ -51,6 +59,7 @@ "\n", " def __init__(self, fpath, batch_size=32, seq_length=100, train_ratio=0.8):\n", " '''Data object for loading a plain text file.\n", + "\n", " Args:\n", " fpath, path to the text file.\n", " train_ratio, split the text file into train and test sets, where\n", @@ -63,19 +72,22 @@ " self.idx_to_char = {i: ch for i, ch in enumerate(chars)}\n", " data = [self.char_to_idx[c] for c in self.raw_data]\n", " # seq_length + 1 for the data + label\n", - " nsamples = len(data) / (1 + seq_length)\n", + " nsamples = old_div(len(data), (1 + seq_length))\n", " data = data[0:nsamples * (1 + seq_length)]\n", " data = np.asarray(data, dtype=np.int32)\n", " data = np.reshape(data, (-1, seq_length + 1))\n", " # shuffle all sequences\n", " np.random.shuffle(data)\n", " self.train_dat = data[0:int(data.shape[0]*train_ratio)]\n", - " self.num_train_batch = self.train_dat.shape[0] / batch_size\n", + " self.num_train_batch = old_div(self.train_dat.shape[0], batch_size)\n", " self.val_dat = data[self.train_dat.shape[0]:]\n", - " self.num_test_batch = self.val_dat.shape[0] / batch_size\n", - " print 'train dat', self.train_dat.shape\n", - " print 'val dat', self.val_dat.shape\n", - " \n", + " self.num_test_batch = old_div(self.val_dat.shape[0], batch_size)\n", + " self.batch_size = batch_size\n", + " self.seq_length = seq_length\n", + " print('train dat', self.train_dat.shape)\n", + " print('val dat', self.val_dat.shape)\n", + "\n", + "\n", "def numpy2tensors(npx, npy, dev):\n", " '''batch, seq, dim -- > seq, batch, dim'''\n", " tmpx = np.swapaxes(npx, 0, 1)\n", @@ -108,26 +120,47 @@ "cell_type": "markdown", "metadata": {}, "source": [ + "Prepare the dataset. Download [all works of Shakespeare concatenated](http://cs.stanford.edu/people/karpathy/char-rnn/shakespeare_input.txt). Other plain text files can also be used. " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ "## Create the network" ] }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": false - }, - "outputs": [], + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "train dat (36224, 101)\n", + "val dat (9056, 101)\n", + "dense w (32, 67)\n", + "dense b (67,)\n", + "dense weight l1 = 0.154445\n", + "dense b l1 = 0.000000\n" + ] + } + ], "source": [ - "\n", "def get_lr(epoch):\n", - " return 0.001 / float(1 << (epoch / 50))\n", + " return old_div(0.001, float(1 << (old_div(epoch, 50))))\n", + "\n", + "hidden_size=32\n", + "num_stacks=1\n", + "dropout=0.5\n", "\n", - "data = Data('static/linux_input.txt')\n", + "data = Data('static/shakespeare_input.txt')\n", "# SGD with L2 gradient normalization\n", "opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5))\n", "cuda = device.create_cuda_gpu()\n", - "rnn = layer.LSTM(name='lstm', hidden_size=32, num_stacks=1, dropout=0.5, input_sample_shape=(data.vocab_size,))\n", + "rnn = layer.LSTM(name='lstm', hidden_size=hidden_size, num_stacks=num_stacks, dropout=dropout, input_sample_shape=(data.vocab_size,))\n", "rnn.to_device(cuda)\n", "rnn_w = rnn.param_values()[0]\n", "rnn_w.uniform(-0.08, 0.08) \n", @@ -136,16 +169,15 @@ "dense.to_device(cuda)\n", "dense_w = dense.param_values()[0]\n", "dense_b = dense.param_values()[1]\n", - "print 'dense w ', dense_w.shape\n", - "print 'dense b ', dense_b.shape\n", + "print('dense w ', dense_w.shape)\n", + "print('dense b ', dense_b.shape)\n", "initializer.uniform(dense_w, dense_w.shape[0], 0)\n", - "print 'dense weight l1 = %f' % (dense_w.l1())\n", + "print('dense weight l1 = %f' % (dense_w.l1()))\n", "dense_b.set_value(0)\n", - "print 'dense b l1 = %f' % (dense_b.l1())\n", + "print('dense b l1 = %f' % (dense_b.l1()))\n", "\n", "g_dense_w = tensor.Tensor(dense_w.shape, cuda)\n", - "g_dense_b = tensor.Tensor(dense_b.shape, cuda)\n", - "\n" + "g_dense_b = tensor.Tensor(dense_b.shape, cuda)" ] }, { @@ -157,19 +189,72 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f97e3eae043e4cafb09b9860af94ef3c" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Epoch 0, train loss is 2.722489\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "b614c1d388d94b839723aaf8272e968f" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Epoch 1, train loss is 4.940666\n" + ] + }, + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "77878ccc79ab444d9b5d7bb9cc3b95dd" + } + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "\n", + "Epoch 2, train loss is 7.043295\n" + ] + } + ], "source": [ "lossfun = loss.SoftmaxCrossEntropy()\n", "train_loss = 0\n", "for epoch in range(3):\n", " bar = tnrange(data.num_train_batch, desc='Epoch %d' % 0)\n", " for b in bar:\n", - " batch = data.train_dat[b * batch_size: (b + 1) * batch_size]\n", - " inputs, labels = convert(batch, batch_size, seq_length, data.vocab_size, cuda)\n", + " batch = data.train_dat[b * data.batch_size: (b + 1) * data.batch_size]\n", + " inputs, labels = convert(batch, data.batch_size, data.seq_length, data.vocab_size, cuda)\n", " inputs.append(tensor.Tensor())\n", " inputs.append(tensor.Tensor())\n", "\n", @@ -183,13 +268,13 @@ " lvalue = lossfun.forward(model_pb2.kTrain, act, label)\n", " batch_loss += lvalue.l1()\n", " grad = lossfun.backward()\n", - " grad /= batch_size\n", + " grad /= data.batch_size\n", " grad, gwb = dense.backward(model_pb2.kTrain, grad)\n", " grads.append(grad)\n", " g_dense_w += gwb[0]\n", " g_dense_b += gwb[1]\n", " # print output.l1(), act.l1()\n", - " bar.set_postfix(train_loss=batch_loss / seq_length)\n", + " bar.set_postfix(train_loss=old_div(batch_loss, data.seq_length))\n", " train_loss += batch_loss\n", "\n", " grads.append(tensor.Tensor())\n", @@ -199,8 +284,7 @@ " opt.apply_with_lr(epoch, get_lr(epoch), g_rnn_w, rnn_w, 'rnnw')\n", " opt.apply_with_lr(epoch, get_lr(epoch), g_dense_w, dense_w, 'dense_w')\n", " opt.apply_with_lr(epoch, get_lr(epoch), g_dense_b, dense_b, 'dense_b')\n", - " print '\\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch / seq_length)\n", - "\n" + " print('\\nEpoch %d, train loss is %f' % (epoch, train_loss / data.num_train_batch / data.seq_length))" ] }, { @@ -212,14 +296,22 @@ }, { "cell_type": "code", - "execution_count": null, - "metadata": { - "collapsed": true - }, - "outputs": [], + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "saving model to static/model_2.bin\n" + ] + } + ], "source": [ - "with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd:\n", - " print 'saving model to %s' % model_path\n", + "model_path= 'static/model_' + str(epoch) + '.bin'\n", + "\n", + "with open(model_path, 'wb') as fd:\n", + " print('saving model to %s' % model_path)\n", " d = {}\n", " for name, w in zip(['rnn_w', 'dense_w', 'dense_b'],[rnn_w, dense_w, dense_b]):\n", " d[name] = tensor.to_numpy(w)\n", @@ -228,28 +320,149 @@ " d['hidden_size'] = hidden_size\n", " d['num_stacks'] = num_stacks\n", " d['dropout'] = dropout\n", - " pickle.dump(d, fd)" + " pickle.dump(d, fd)\n", + "fd.close()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Sample" ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Before we proceed any further, hear me speak.\n", + "\n", + "BRANCANBHAND:\n", + "But yey toor ssen!\n", + "\n", + "CRROSLA:\n", + "Ony chorsery,\n", + "I sty hit to ruse's\n", + "'bae\n", + "As bit.\n", + "Hew, sfohmzero nitl\n", + "No Wimen;\n", + "A astherter!\n", + "\n", + "CAORTEUS:\n", + "Dodt;\n", + "Wighble a cavinn a nooms;\n", + "Pepeif,\n", + "That by peryer,\n", + "Cisher jay thay ro ou hough me me awow, and fer,\n", + "Got thy\n", + "zith shone sort in and kides Eok spand.\n", + "\n", + "\n" + ] + } + ], + "source": [ + "nsamples = 300\n", + "seed_text = \"Before we proceed any further, hear me speak.\"\n", + "do_sample = True\n", + "\n", + "with open(model_path, 'rb') as fd:\n", + " d = pickle.load(fd)\n", + " rnn_w = tensor.from_numpy(d['rnn_w'])\n", + " idx_to_char = d['idx_to_char']\n", + " char_to_idx = d['char_to_idx']\n", + " vocab_size = len(idx_to_char)\n", + " dense_w = tensor.from_numpy(d['dense_w'])\n", + " dense_b = tensor.from_numpy(d['dense_b'])\n", + " hidden_size = d['hidden_size']\n", + " num_stacks = d['num_stacks']\n", + " dropout = d['dropout']\n", + "\n", + "rnn = layer.LSTM(name='lstm', hidden_size=hidden_size,\n", + " num_stacks=num_stacks, dropout=dropout,\n", + " input_sample_shape=(len(idx_to_char),))\n", + "rnn.to_device(cuda)\n", + "rnn.param_values()[0].copy_data(rnn_w)\n", + "dense = layer.Dense('dense', vocab_size, input_sample_shape=(hidden_size,))\n", + "dense.to_device(cuda)\n", + "dense.param_values()[0].copy_data(dense_w)\n", + "dense.param_values()[1].copy_data(dense_b)\n", + "hx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)\n", + "cx = tensor.Tensor((num_stacks, 1, hidden_size), cuda)\n", + "hx.set_value(0.0)\n", + "cx.set_value(0.0)\n", + "if len(seed_text) > 0:\n", + " for c in seed_text:\n", + " x = np.zeros((1, vocab_size), dtype=np.float32)\n", + " x[0, char_to_idx[c]] = 1\n", + " tx = tensor.from_numpy(x)\n", + " tx.to_device(cuda)\n", + " inputs = [tx, hx, cx]\n", + " outputs = rnn.forward(model_pb2.kEval, inputs)\n", + " y = dense.forward(model_pb2.kEval, outputs[0])\n", + " y = tensor.softmax(y)\n", + " hx = outputs[1]\n", + " cx = outputs[2]\n", + " sys.stdout.write(seed_text)\n", + "else:\n", + " y = tensor.Tensor((1, vocab_size), cuda)\n", + " y.set_value(old_div(1.0, vocab_size))\n", + "\n", + "for i in range(nsamples):\n", + " y.to_host()\n", + " prob = tensor.to_numpy(y)[0]\n", + " if do_sample:\n", + " cur = np.random.choice(vocab_size, 1, p=prob)[0]\n", + " else:\n", + " cur = np.argmax(prob)\n", + " sys.stdout.write(idx_to_char[cur])\n", + " x = np.zeros((1, vocab_size), dtype=np.float32)\n", + " x[0, cur] = 1\n", + " tx = tensor.from_numpy(x)\n", + " tx.to_device(cuda)\n", + " inputs = [tx, hx, cx]\n", + " outputs = rnn.forward(model_pb2.kEval, inputs)\n", + " y = dense.forward(model_pb2.kEval, outputs[0])\n", + " y = tensor.softmax(y)\n", + " hx = outputs[1]\n", + " cx = outputs[2]\n", + "print('')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "collapsed": true + }, + "outputs": [], + "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { - "display_name": "Python [conda env:conda]", + "display_name": "py3", "language": "python", - "name": "conda-env-conda-py" + "name": "py3" }, "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.13" + "pygments_lexer": "ipython3", + "version": "3.5.3" } }, "nbformat": 4, http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/8c9b594b/python/setup.py.in ---------------------------------------------------------------------- diff --git a/python/setup.py.in b/python/setup.py.in index 044710f..97ea2ca 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -66,7 +66,10 @@ setup( 'flask>=0.10.1', 'flask_cors>=3.0.2', 'pillow>=2.3.0', - 'future' + 'future', + 'xmlrunner', + 'tqdm', + 'ipywidgets', ], #List additional groups of dependencies here (e.g. development