fix minor bug 1. in pool.cc due to buf check, which should be done only for average pooling 2. update the tensor.py to avoid error in to_numpy() due to inconsistency of swig tensor and py tensor members, e.g. device/shape.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/22889bc5 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/22889bc5 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/22889bc5 Branch: refs/heads/dev Commit: 22889bc5e5537c2ecc1607d01eb3bdb19fbaa7dc Parents: 0a76425 Author: Wei Wang <[email protected]> Authored: Tue Aug 16 15:18:07 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Tue Aug 16 15:39:39 2016 +0800 ---------------------------------------------------------------------- examples/char-rnn/train.py | 36 +++++++++++++++++++----------------- src/model/layer/pooling.cc | 17 +++++++++-------- src/python/singa/tensor.py | 16 ++++++++++------ 3 files changed, 38 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/examples/char-rnn/train.py ---------------------------------------------------------------------- diff --git a/examples/char-rnn/train.py b/examples/char-rnn/train.py index 137df80..d28646e 100644 --- a/examples/char-rnn/train.py +++ b/examples/char-rnn/train.py @@ -98,7 +98,7 @@ def get_lr(epoch): def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, - num_stacks=1, dropout=0.5, model_path='model.bin'): + num_stacks=1, dropout=0.5, model_path='model'): # SGD with L2 gradient normalization opt = optimizer.RMSProp(constraint=optimizer.L2Constraint(5)) cuda = device.create_cuda_gpu() @@ -194,22 +194,24 @@ def train(data, max_epoch, hidden_size=100, seq_length=100, batch_size=16, print 'Epoch %d, evaluation loss is %f' % \ (epoch, eval_loss / data.num_test_batch / seq_length) - # checkpoint the file model - with open(model_path, 'wb') as fd: - print 'saving model to %s' % model_path - d = {} - for name, w in zip( - ['rnn_w', 'dense_w', 'dense_b'], - [rnn_w, dense_w, dense_b]): - w.to_host() - d[name] = tensor.to_numpy(w) - d['idx_to_char'] = data.idx_to_char - d['char_to_idx'] = data.char_to_idx - d['hidden_size'] = hidden_size - d['num_stacks'] = num_stacks - d['dropout'] = dropout - - pickle.dump(d, fd) + if (epoch + 1) % 30 == 0: + # checkpoint the file model + with open('%s_%d.bin' % (model_path, epoch), 'wb') as fd: + print 'saving model to %s' % model_path + d = {} + for name, w in zip( + ['rnn_w', 'dense_w', 'dense_b'], + [rnn_w, dense_w, dense_b]): + w.to_host() + d[name] = tensor.to_numpy(w) + w.to_device(cuda) + d['idx_to_char'] = data.idx_to_char + d['char_to_idx'] = data.char_to_idx + d['hidden_size'] = hidden_size + d['num_stacks'] = num_stacks + d['dropout'] = dropout + + pickle.dump(d, fd) if __name__ == '__main__': parser = argparse.ArgumentParser( http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/model/layer/pooling.cc ---------------------------------------------------------------------- diff --git a/src/model/layer/pooling.cc b/src/model/layer/pooling.cc index a18f9de..1312776 100644 --- a/src/model/layer/pooling.cc +++ b/src/model/layer/pooling.cc @@ -107,27 +107,28 @@ const std::pair<Tensor, vector<Tensor>> Pooling::Backward(int flag, CHECK_EQ(grad.device()->lang(), kCpp); CHECK_EQ(grad.nDim(), 4u); vector<Tensor> param_grad; - CHECK(!buf_.empty()); - Tensor mask = buf_.top(); - buf_.pop(); - size_t batchsize = grad.shape(0); + size_t batchsize = grad.shape(0); Shape shape{batchsize, channels_, height_, width_}; auto dev = grad.device(); DataType dtype = grad.data_type(); Tensor dx(shape, dev, dtype); auto gradptr = grad.data<float>(); - auto maskptr = mask.data<float>(); float* dxptr = new float[dx.Size()]; - if (pool_ == PoolingConf_PoolMethod_MAX) + if (pool_ == PoolingConf_PoolMethod_MAX) { + CHECK(!buf_.empty()); + Tensor mask = buf_.top(); + buf_.pop(); + auto maskptr = mask.data<float>(); BackwardMaxPooling(gradptr, maskptr, batchsize, channels_, height_, width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, dxptr); - else if (pool_ == PoolingConf_PoolMethod_AVE) + } else if (pool_ == PoolingConf_PoolMethod_AVE) { BackwardAvgPooling(gradptr, batchsize, channels_, height_, width_, kernel_h_, kernel_w_, pad_h_, pad_w_, stride_h_, stride_w_, dxptr); - else + } else { LOG(FATAL) << "Unknow pooling method"; + } dx.CopyDataFromHostPtr(dxptr, dx.Size()); delete[] dxptr; http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/22889bc5/src/python/singa/tensor.py ---------------------------------------------------------------------- diff --git a/src/python/singa/tensor.py b/src/python/singa/tensor.py index a1e948d..f6bca43 100644 --- a/src/python/singa/tensor.py +++ b/src/python/singa/tensor.py @@ -79,15 +79,14 @@ class Tensor(object): return else: assert isinstance(shape, tuple), 'shape should be tuple' - vs = list(shape) if device is None: device = pydevice.get_default_device() - self.singa_tensor = singa.Tensor(vs, device, dtype) + self.singa_tensor = singa.Tensor(list(shape), device, dtype) else: - self.singa_tensor = singa.Tensor(vs, device, dtype) - self.shape = shape - self.device = device - self.dtype = dtype + self.singa_tensor = singa.Tensor(list(shape), device, dtype) + self.shape = shape + self.dtype = dtype + self.device = device def ndim(self): ''' @@ -136,6 +135,9 @@ class Tensor(object): t (Tensor) ''' self.singa_tensor.ResetLike(t.singa_tensor) + self.shape = t.shape + self.device = t.device + self.dtype = t.dtype ''' def as_type(self, dtype): @@ -153,11 +155,13 @@ class Tensor(object): device: a swig Device converted from CudaGPU or CppCPU or OpenclGPU ''' self.singa_tensor.ToDevice(device) + self.device = device def to_host(self): '''Move the tensor data onto the default host CppCPU device. ''' self.singa_tensor.ToHost() + self.device = pydevice.default_device def l2(self): '''
