[10/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT

wangwei Tue, 05 Jan 2016 10:11:13 -0800

SINGA-120 - Implemented GRU and BPTT

Change back to r * (h x U) for new memory compuation;
Loss to 2.8 per char/unit.



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/a2f4e468
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/a2f4e468
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/a2f4e468

Branch: refs/heads/master
Commit: a2f4e4680bb7b5dc5077a064a757698e30cc5e13
Parents: 6a4c996
Author: Wei Wang <[email protected]>
Authored: Wed Jan 6 00:35:41 2016 +0800
Committer: Wei Wang <[email protected]>
Committed: Wed Jan 6 02:03:50 2016 +0800

----------------------------------------------------------------------
 examples/char-rnn/data.py                   |   8 +
 examples/char-rnn/job.conf                  | 250 +++++++++++++++++++
 examples/char-rnn/sample.conf               | 212 ++++++++++++++++
 include/singa/neuralnet/neuron_layer.h      |  15 +-
 include/singa/utils/math_blob.h             |   2 +
 src/neuralnet/connection_layer/rnn_dummy.cc |  67 +++++
 src/neuralnet/input_layer/char_rnn.cc       |   6 +-
 src/neuralnet/input_layer/onehot.cc         |  40 +++
 src/neuralnet/neuralnet.cc                  |  73 +++---
 src/neuralnet/neuron_layer/dummy.cc         |   1 -
 src/neuralnet/neuron_layer/embedding.cc     |   4 +-
 src/neuralnet/neuron_layer/gru.cc           | 298 +++++++++++------------
 src/neuralnet/output_layer/char_rnn.cc      |  51 ++++
 src/utils/updater.cc                        |   6 +
 src/worker.cc                               |   3 +-
 15 files changed, 832 insertions(+), 204 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/data.py
----------------------------------------------------------------------
diff --git a/examples/char-rnn/data.py b/examples/char-rnn/data.py
new file mode 100644
index 0000000..4b1c28c
--- /dev/null
+++ b/examples/char-rnn/data.py
@@ -0,0 +1,8 @@
+# pls get linux_input.txt from http://cs.stanford.edu/people/karpathy/char-rnn/
+data = open('linux_input.txt', 'r').read() # should be simple plain text file
+chars = list(set(data))
+data_size, vocab_size = len(data), len(chars)
+print 'data has %d characters, %d unique.' % (data_size, vocab_size)
+with open('vocab.txt', 'w') as fd:
+  fd.write("".join(chars))
+  fd.flush()

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/job.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/job.conf b/examples/char-rnn/job.conf
new file mode 100644
index 0000000..2e1c761
--- /dev/null
+++ b/examples/char-rnn/job.conf
@@ -0,0 +1,250 @@
+name:"char-rnn"
+train_steps: 100000
+disp_freq: 100
+#debug: true
+gpu: 0
+train_one_batch {
+  alg: kBPTT
+}
+
+updater {
+  type: kRMSProp
+  rmsprop_conf {
+    rho: 0.95
+  }
+  learning_rate {
+    type: kStep
+    base_lr: 0.002
+    step_conf {
+      gamma: 0.97
+      change_freq: 2000
+    }
+  }
+  clip_low: -5
+  clip_high: 5
+}
+
+neuralnet {
+  unroll_len: 50
+  layer {
+    name: "data"
+    type: kCharRNN
+    unroll_len: 1
+    char_rnn_conf {
+      path: "examples/char-rnn/linux_input.txt"
+      vocab_path:"examples/char-rnn/vocab.txt"
+      batchsize: 50
+      unroll_len: 50
+    }
+  }
+  layer {
+    name: "onehot"
+    type: kOneHot
+    srclayers: "data"
+    unroll_conn_type: kUnrollOneToAll
+  }
+
+  layer {
+    name: "label"
+    type: kRNNLabel
+    srclayers: "data"
+    unroll_conn_type: kUnrollOneToAll
+  }
+
+  layer {
+    name: "gru1"
+    type: kGRU
+    srclayers: "onehot"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+
+  }
+#  layer {
+#    name: "gru2"
+#    type: kGRU
+#    srclayers: "gru1"
+#    gru_conf {
+#      dim_hidden: 512
+#    }
+#    param {
+#      name: "z_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_hx2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "z_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_hh2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "z_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "r_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#    param {
+#      name: "c_b2"
+#      init {
+#        type: kUniform
+#        low: -0.08
+#        high: 0.08
+#      }
+#    }
+#  }
+#
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers: "gru1"
+    innerproduct_conf {
+      num_output: 101
+    }
+    param {
+      name: "w"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "loss"
+    type: kSoftmaxLoss
+    srclayers: "ip1"
+    srclayers: "label"
+  }
+}
+
+cluster {
+  workspace: "examples/char-rnn/"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/examples/char-rnn/sample.conf
----------------------------------------------------------------------
diff --git a/examples/char-rnn/sample.conf b/examples/char-rnn/sample.conf
new file mode 100644
index 0000000..b15ef9e
--- /dev/null
+++ b/examples/char-rnn/sample.conf
@@ -0,0 +1,212 @@
+name:"char-rnn"
+test_steps: 100
+#debug: true
+gpu: 0
+checkpoint_path: "examples/char-rnn/checkpoint/step2000-worker0"
+train_one_batch {
+  alg: kBPTT
+}
+
+neuralnet {
+  layer {
+    name: "data"
+    type: kRNNDummy
+    rnn_dummy_conf {
+      shape: 1
+      integer: true
+      low: 0
+      high: 101
+      dynamic_srclayer: "argsort"
+    }
+  }
+  layer {
+    name: "onehot"
+    type: kOneHot
+    srclayers: "data"
+  }
+
+  layer {
+    name: "gru1"
+    type: kGRU
+    srclayers: "onehot"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "gru2"
+    type: kGRU
+    srclayers: "gru1"
+    gru_conf {
+      dim_hidden: 512
+    }
+    param {
+      name: "z_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hx2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "z_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "r_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "c_hh2"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+
+
+  layer {
+    name: "ip1"
+    type: kInnerProduct
+    srclayers: "gru2"
+    innerproduct_conf {
+      num_output: 101
+    }
+    param {
+      name: "w"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+    param {
+      name: "b"
+      init {
+        type: kUniform
+        low: -0.08
+        high: 0.08
+      }
+    }
+  }
+  layer {
+    name: "softmax"
+    type: kSoftmax
+    srclayers: "ip1"
+  }
+  layer {
+    name: "argsort"
+    type: kArgSort
+    srclayers: "softmax"
+  }
+  layer {
+    name: "sampling"
+    type: kCharRNNOutput
+    srclayers: "argsort"
+    char_rnn_conf {
+      vocab_path: "examples/char-rnn/vocab.txt"
+    }
+  }
+}
+
+cluster {
+  workspace: "examples/char-rnn/"
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/neuralnet/neuron_layer.h
----------------------------------------------------------------------
diff --git a/include/singa/neuralnet/neuron_layer.h 
b/include/singa/neuralnet/neuron_layer.h
index e1a63a2..c612aed 100644
--- a/include/singa/neuralnet/neuron_layer.h
+++ b/include/singa/neuralnet/neuron_layer.h
@@ -187,16 +187,15 @@ class GRULayer : public NeuronLayer {
   }
 
   const std::vector<Param*> GetParams() const override {
+    std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
+      weight_z_hh_, weight_r_hh_, weight_c_hh_};
+
     if (bias_z_ != nullptr && bias_r_ != nullptr && bias_c_ != nullptr) {
-      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
-        weight_z_hh_, weight_r_hh_, weight_c_hh_,
-        bias_z_, bias_r_, bias_c_};
-      return params;
-    } else {
-      std::vector<Param*> params{weight_z_hx_, weight_r_hx_,weight_c_hx_,
-        weight_z_hh_, weight_r_hh_, weight_c_hh_};
-      return params;
+      params.push_back(bias_z_);
+      params.push_back(bias_r_);
+      params.push_back(bias_c_);
     }
+    return params;
   }
 
  private:

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/include/singa/utils/math_blob.h
----------------------------------------------------------------------
diff --git a/include/singa/utils/math_blob.h b/include/singa/utils/math_blob.h
index bdaf914..e151c37 100644
--- a/include/singa/utils/math_blob.h
+++ b/include/singa/utils/math_blob.h
@@ -267,6 +267,8 @@ void Map(const Blob<Dtype> & A, Blob<Dtype> * B) {
   } else {
 #ifdef USE_GPU
     gpu_e_f<Op>(A.count(), A.gpu_data(), B->mutable_gpu_data());
+#else
+    LOG(ERROR) << "Not implemented";
 #endif  // USE_GPU
   }
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/connection_layer/rnn_dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/connection_layer/rnn_dummy.cc 
b/src/neuralnet/connection_layer/rnn_dummy.cc
new file mode 100644
index 0000000..865066f
--- /dev/null
+++ b/src/neuralnet/connection_layer/rnn_dummy.cc
@@ -0,0 +1,67 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include "singa/neuralnet/connection_layer.h"
+#include "singa/utils/math_blob.h"
+
+namespace singa {
+
+void RNNDummyLayer::Setup(const LayerProto& conf,
+                       const vector<Layer*>& srclayers) {
+  Layer::Setup(conf, srclayers);
+  dynamic_src_ = AddPrefixSuffix(unroll_index(), partition_id(),
+      conf.rnn_dummy_conf().dynamic_srclayer());
+  LOG(ERROR) << dynamic_src_;
+  vector<int> shape;
+  for (int s : conf.rnn_dummy_conf().shape())
+    shape.push_back(s);
+  integer_ = conf.rnn_dummy_conf().integer();
+  low_ = conf.rnn_dummy_conf().low();
+  high_ = conf.rnn_dummy_conf().high();
+  // if no src layer, then it will genereate data by itself based on shape
+  // and random range
+  if (srclayers.size() == 0) {
+    CHECK(shape.size());
+    CHECK_NE(low_, high_);
+    data_.Reshape(shape);
+    srclayer_ = nullptr;
+  } else {
+    srclayer_ = srclayers.at(0);
+    data_.ReshapeLike(srclayer_->data(this));
+    data_.ShareData(srclayer_->mutable_data(this), false);
+  }
+}
+
+void RNNDummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  if (srclayers.size() == 0) {
+    SampleUniform(low_, high_, &data_);
+    if (integer_) {
+      for (int i = 0; i < data_.count(); i ++) {
+        data_.mutable_cpu_data()[i] = floor(data_.cpu_data()[i]);
+      }
+    }
+  } else if (srclayer_ != srclayers.at(0)) {
+    srclayer_ = srclayers.at(0);
+    data_.ShareData(srclayer_->mutable_data(this), false);
+  }
+}
+}  // namespace singa
+

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/char_rnn.cc 
b/src/neuralnet/input_layer/char_rnn.cc
index cc13b1b..8a56711 100644
--- a/src/neuralnet/input_layer/char_rnn.cc
+++ b/src/neuralnet/input_layer/char_rnn.cc
@@ -69,6 +69,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
     // decide the start pos of each instance in one mini-batch
     int max_offset = buf_.length() / batchsize_;
     CHECK_GT(max_offset, unroll_len_);
+    LOG(ERROR) << "Max iteration per epoch = " << max_offset / unroll_len_;
     for (int i = 0; i < batchsize_; i ++) {
       start_.push_back(i * max_offset);
     }
@@ -77,7 +78,7 @@ void CharRNNInputLayer::ComputeFeature(int flag,
   for (int l = 0; l < unroll_len_ + 1; l++) {
     float* ptr = datavec_[l]->mutable_cpu_data();
     for (int i = 0; i < batchsize_; i++) {
-      ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + l]));
+      ptr[i] = static_cast<float>(char2index_.at(buf_[start_[i] + offset_ + 
l]));
     }
   }
   offset_ += unroll_len_;
@@ -87,9 +88,6 @@ void CharRNNInputLayer::ComputeFeature(int flag,
 //  std::shuffle(start_.begin(), start_.end(), g);
     offset_ = 0;
     // return -1;
-  } else {
-    // return 0;
   }
 }
-
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/input_layer/onehot.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/input_layer/onehot.cc 
b/src/neuralnet/input_layer/onehot.cc
new file mode 100644
index 0000000..056656a
--- /dev/null
+++ b/src/neuralnet/input_layer/onehot.cc
@@ -0,0 +1,40 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+#include "singa/neuralnet/input_layer.h"
+
+namespace singa {
+void OneHotLayer::Setup(const LayerProto& conf,
+    const vector<Layer*>& srclayers) {
+  InputLayer::Setup(conf, srclayers);
+  batchsize_ = srclayers.at(0)->data(unroll_index()).shape(0);
+  dim_ = 101 ;  // proto.onehot_conf().vocab_size();
+  data_.Reshape(batchsize_, dim_);
+}
+
+void OneHotLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {
+  float* ptr = data_.mutable_cpu_data();
+  memset(ptr, 0, sizeof(float) * data_.count());
+  const float* idx = srclayers[0]->data(unroll_index()).cpu_data();
+  for (int i = 0; i < batchsize_; i++) {
+    ptr[i * dim_ + static_cast<int>(idx[i])] = 1;
+  }
+}
+}  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuralnet.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuralnet.cc b/src/neuralnet/neuralnet.cc
index 49978a1..b045e06 100644
--- a/src/neuralnet/neuralnet.cc
+++ b/src/neuralnet/neuralnet.cc
@@ -19,13 +19,12 @@
 *
 *************************************************************/
 
-#include "singa/neuralnet/neuralnet.h"
 
+#include "singa/neuralnet/neuralnet.h"
+#include <unordered_map>
 #include <algorithm>
 #include <queue>
 #include "singa/utils/singleton.h"
-#include <unordered_map>
-using namespace std;
 
 namespace singa {
 
@@ -60,7 +59,7 @@ const NetProto NetConfPreprocess(const NetProto& conf) {
 }
 
 NeuralNet* NeuralNet::Create(const NetProto& net_conf, Phase phase,
-                                        int npartitions) {
+    int npartitions) {
   const NetProto& full_net_conf = NetConfPreprocess(net_conf);
   NetProto conf = full_net_conf;
   conf.clear_layer();
@@ -99,21 +98,21 @@ NeuralNet* NeuralNet::Create(const NetProto& net_conf, 
Phase phase,
       layer_conf->set_partition_dim(net_conf.partition_dim());
   }
   // LOG(INFO) << "Before unrolling: \n" << conf.DebugString();
-  conf = Unrolling (conf);
+  conf = Unrolling(conf);
 
   // Copy shared parameters for sharing param conf
   std::vector<ParamProto*> shares;
   std::unordered_map<string, ParamProto*> name2param;
-  for (int index = 0; index < conf.layer_size();index ++) {
-         LayerProto* layer = conf.mutable_layer(index);
-         for (int i = 0; i < layer->param_size(); i++) {
-                 ParamProto* param = layer->mutable_param(i);
+  for (int index = 0; index < conf.layer_size(); index++) {
+    LayerProto* layer = conf.mutable_layer(index);
+    for (int i = 0; i < layer->param_size(); i++) {
+      ParamProto* param = layer->mutable_param(i);
       CHECK(name2param.find(param->name()) == name2param.end())
         << "Repeated param = " << param->name();
-                         name2param[param->name()] = param;
-                 if (param->has_share_from() && param->share_from() != "")
-                         shares.push_back(param);
-         }
+      name2param[param->name()] = param;
+      if (param->has_share_from() && param->share_from() != "")
+        shares.push_back(param);
+    }
   }
   for (auto param : shares) {
     const std::string from = param->share_from();
@@ -135,32 +134,30 @@ const NetProto NeuralNet::Unrolling(const NetProto& 
net_conf) {
   NetProto conf;
 
   std::vector<std::vector<int>> layer_groups;
-  std::unordered_map<string,int> org_layer_names;
+  std::unordered_map<string, int> org_layer_names;
   for (int index = 0; index < net_conf.layer_size(); index ++) {
     const LayerProto& org_layer = net_conf.layer(index);
-    org_layer_names[org_layer.name()] = index; // layer_name -> index
+    org_layer_names[org_layer.name()] = index;  // layer_name -> index
 
     std::vector<int> layer_group;
-    for (int i = 0; i < org_layer.unroll_len(); i ++) { // unroll
+    for (int i = 0; i < org_layer.unroll_len(); i ++) {  // unroll
       LayerProto* unroll_layer = conf.add_layer();
-      unroll_layer->CopyFrom(org_layer); // create a new layer conf
-      // if (org_layer.unroll_len() > 1) {
-        // update layer names
-        std::stringstream sstm;
-        sstm << i << '#' << unroll_layer->name();
-        unroll_layer->set_name(sstm.str());
-        unroll_layer->set_unroll_index(i);
-        // update layer parameter sharing
-        for (int j = 0; j < unroll_layer->param_size(); j ++) {
-          ParamProto* param = unroll_layer->mutable_param(j);
-          if (i > 0) {
-            param->set_share_from("0#" + param->name());
-          }
-          std::stringstream sstm1;
-          sstm1 << i << '#' << param->name();
-          param->set_name(sstm1.str());
+      unroll_layer->CopyFrom(org_layer);  // create a new layer conf
+      // update layer names
+      std::stringstream sstm;
+      sstm << i << '#' << unroll_layer->name();
+      unroll_layer->set_name(sstm.str());
+      unroll_layer->set_unroll_index(i);
+      // update layer parameter sharing
+      for (int j = 0; j < unroll_layer->param_size(); j ++) {
+        ParamProto* param = unroll_layer->mutable_param(j);
+        if (i > 0) {
+          param->set_share_from("0#" + param->name());
         }
-      // }
+        std::stringstream sstm1;
+        sstm1 << i << '#' << param->name();
+        param->set_name(sstm1.str());
+      }
       // clear unrolling related fields
       unroll_layer->clear_unroll_len();
       unroll_layer->clear_unroll_conn_type();
@@ -176,7 +173,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& 
net_conf) {
   for (int index = 0; index < net_conf.layer_size(); index ++) {
     const LayerProto& org_layer = net_conf.layer(index);
     if (org_layer.srclayers_size() == 0)
-      continue; // no src layer
+      continue;   // no src layer
     for (int i = 0; i < org_layer.srclayers_size(); i ++) {
       const string& org_layer_src = org_layer.srclayers(i);
       singa::UnrollConnType unroll_conn_type = kUnrollOneToOne;
@@ -197,7 +194,7 @@ const NetProto NeuralNet::Unrolling(const NetProto& 
net_conf) {
             unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
           }
         } else if (unroll_conn_type == kUnrollOneToOne) {
-          if (j < shift) continue; // no need to connect with the src
+          if (j < shift) continue;  // no need to connect with the src
           int unroll_layer_src = unroll_layer_srcs[j - shift];
           unroll_layer->add_srclayers(conf.layer(unroll_layer_src).name());
         } else if (unroll_conn_type == kUnrollFirstToLast) {
@@ -209,16 +206,14 @@ const NetProto NeuralNet::Unrolling(const NetProto& 
net_conf) {
       }
     }
 
-    //TODO(fanju): add LSTM when it is ready
-    if (org_layer.type() == kGRU) { // connect GRU layers
+    // TODO(fanju): add LSTM when it is ready
+    if (org_layer.type() == kGRU) {  // connect GRU layers
       for (unsigned int j = 1; j < layer_groups[index].size(); j ++) {
         LayerProto* unroll_layer = conf.mutable_layer(layer_groups[index][j]);
         string srcname = conf.layer(layer_groups[index][j-1]).name();
         unroll_layer->add_srclayers(srcname);
-        // LOG(ERROR) << "connect " << unroll_layer->name() << " from " << 
srcname;
       }
     }
-
   }
   return conf;
 }

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/dummy.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/dummy.cc 
b/src/neuralnet/neuron_layer/dummy.cc
index 9ccb179..936bb5e 100644
--- a/src/neuralnet/neuron_layer/dummy.cc
+++ b/src/neuralnet/neuron_layer/dummy.cc
@@ -45,7 +45,6 @@ void DummyLayer::Setup(const LayerProto& proto,
   if (proto.dummy_conf().output()) {  // use as output layer
     output_ = true;
   }
-
 }
 
 void DummyLayer::ComputeFeature(int flag, const vector<Layer*>& srclayers) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/embedding.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/embedding.cc 
b/src/neuralnet/neuron_layer/embedding.cc
index 00e9139..c980c54 100644
--- a/src/neuralnet/neuron_layer/embedding.cc
+++ b/src/neuralnet/neuron_layer/embedding.cc
@@ -65,8 +65,8 @@ void EmbeddingLayer::ComputeFeature(int flag, const 
vector<Layer*>& srclayers) {
   }
 }
 
-void EmbeddingLayer::ComputeGradient(int flag, const vector<Layer*>& srclayers)
-{
+void EmbeddingLayer::ComputeGradient(int flag,
+    const vector<Layer*>& srclayers) {
   const float* word_idx = srclayers.at(0)->data(unroll_index()).cpu_data();
   auto context = Singleton<Context>::Instance();
   if ((flag & kAggGrad) == 0)

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/neuron_layer/gru.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/neuron_layer/gru.cc 
b/src/neuralnet/neuron_layer/gru.cc
index cf7425b..da2f9c5 100644
--- a/src/neuralnet/neuron_layer/gru.cc
+++ b/src/neuralnet/neuron_layer/gru.cc
@@ -19,14 +19,12 @@
 *
 *************************************************************/
 
-#include "singa/neuralnet/neuron_layer.h"
-
 #include <glog/logging.h>
+#include "singa/neuralnet/neuron_layer.h"
 #include "singa/utils/singleton.h"
 #include "singa/utils/math_blob.h"
 #include "singa/utils/singa_op.h"
 
-#include <iostream>
 using namespace std;
 
 namespace singa {
@@ -57,10 +55,10 @@ void GRULayer::Setup(const LayerProto& conf,
   CHECK_LE(srclayers.size(), 2);
   const auto& src = srclayers[0]->data(this);
 
-  batchsize_ = src.shape()[0]; // size of batch
-  vdim_ = src.count() / (batchsize_); // dimension of input
+  batchsize_ = src.shape()[0];  // size of batch
+  vdim_ = src.count() / (batchsize_);  // dimension of input
 
-  hdim_ = layer_conf_.gru_conf().dim_hidden(); // dimension of hidden state
+  hdim_ = layer_conf_.gru_conf().dim_hidden();  // dimension of hidden state
 
   data_.Reshape(vector<int>{batchsize_, hdim_});
   grad_.ReshapeLike(data_);
@@ -77,9 +75,9 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_c_hh_ = Param::Create(conf.param(5));
 
   if (conf.param_size() > 6) {
-         bias_z_ = Param::Create(conf.param(6));
-         bias_r_ = Param::Create(conf.param(7));
-         bias_c_ = Param::Create(conf.param(8));
+    bias_z_ = Param::Create(conf.param(6));
+    bias_r_ = Param::Create(conf.param(7));
+    bias_c_ = Param::Create(conf.param(8));
   }
 
   weight_z_hx_->Setup(vector<int>{hdim_, vdim_});
@@ -91,168 +89,170 @@ void GRULayer::Setup(const LayerProto& conf,
   weight_c_hh_->Setup(vector<int>{hdim_, hdim_});
 
   if (conf.param_size() > 6) {
-         bias_z_->Setup(vector<int>{hdim_});
-         bias_r_->Setup(vector<int>{hdim_});
-         bias_c_->Setup(vector<int>{hdim_});
+    bias_z_->Setup(vector<int>{hdim_});
+    bias_r_->Setup(vector<int>{hdim_});
+    bias_c_->Setup(vector<int>{hdim_});
   }
 
   update_gate = new Blob<float>(batchsize_, hdim_);
   reset_gate = new Blob<float>(batchsize_, hdim_);
-  // reset gate x context
-  reset_context = new Blob<float>(batchsize_, hdim_);
   new_memory = new Blob<float>(batchsize_, hdim_);
 }
 
 void GRULayer::ComputeFeature(int flag,
     const vector<Layer*>& srclayers) {
-       CHECK_LE(srclayers.size(), 2);
-
-       // Do transpose
-  Blob<float> *w_z_hx_t = Transpose (weight_z_hx_->data());
-       Blob<float> *w_z_hh_t = Transpose (weight_z_hh_->data());
-  Blob<float> *w_r_hx_t = Transpose (weight_r_hx_->data());
-       Blob<float> *w_r_hh_t = Transpose (weight_r_hh_->data());
-  Blob<float> *w_c_hx_t = Transpose (weight_c_hx_->data());
-       Blob<float> *w_c_hh_t = Transpose (weight_c_hh_->data());
-
-       // Prepare the data input and the context
-       const auto& src = srclayers[0]->data(this);
-       const Blob<float> *context;
-       if (srclayers.size() == 1) { // only have data input
-               context = new Blob<float>(batchsize_, hdim_);
-       } else { // have data input & context
-               context = &srclayers[1]->data(this);
-       }
-
-       // Compute the update gate
-       GEMM(1.0f, 0.0f, src,*w_z_hx_t,update_gate);
-       if (bias_z_ != nullptr)
-               MVAddRow(1.0f,1.0f,bias_z_->data(),update_gate);
-       GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
-       Map<op::Sigmoid<float>,float>(*update_gate, update_gate);
-
-       // Compute the reset gate
-       GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
-       if (bias_r_ != nullptr)
-               MVAddRow(1.0f,1.0f, bias_r_->data(),reset_gate);
-       GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
-       Map<op::Sigmoid<float>,float>(*reset_gate, reset_gate);
-
-       // Compute the new memory
-       Mult<float>(*reset_gate, *context, reset_context);
-       GEMM(1.0f, 0.0f, *reset_context, *w_c_hh_t, new_memory);
-       GEMM(1.0f, 1.0f, src, *w_c_hx_t, new_memory);
-       if (bias_c_ != nullptr)
-               MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
-       Map<op::Tanh<float>,float>(*new_memory, new_memory);
-
-  Sub(*new_memory, *context, &data_);
+  CHECK_LE(srclayers.size(), 2);
+
+  // Do transpose
+  Blob<float> *w_z_hx_t = Transpose(weight_z_hx_->data());
+  Blob<float> *w_z_hh_t = Transpose(weight_z_hh_->data());
+  Blob<float> *w_r_hx_t = Transpose(weight_r_hx_->data());
+  Blob<float> *w_r_hh_t = Transpose(weight_r_hh_->data());
+  Blob<float> *w_c_hx_t = Transpose(weight_c_hx_->data());
+  Blob<float> *w_c_hh_t = Transpose(weight_c_hh_->data());
+
+  // Prepare the data input and the context
+  const auto& src = srclayers[0]->data(this);
+  const Blob<float> *context;
+  if (srclayers.size() == 1) {  // only have data input
+    context = new Blob<float>(batchsize_, hdim_);
+  } else {  // have data input & context
+    context = &srclayers[1]->data(this);
+  }
+
+  // Compute the update gate
+  GEMM(1.0f, 0.0f, src, *w_z_hx_t, update_gate);
+  if (bias_z_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_z_->data(), update_gate);
+  GEMM(1.0f, 1.0f, *context, *w_z_hh_t, update_gate);
+  Map<op::Sigmoid<float>, float>(*update_gate, update_gate);
+
+  // Compute the reset gate
+  GEMM(1.0f, 0.0f, src, *w_r_hx_t, reset_gate);
+  if (bias_r_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_r_->data(), reset_gate);
+  GEMM(1.0f, 1.0f, *context, *w_r_hh_t, reset_gate);
+  Map<op::Sigmoid<float>, float>(*reset_gate, reset_gate);
+
+  // Compute the new memory
+  GEMM(1.0f, 0.0f, src, *w_c_hx_t, new_memory);
+  if (bias_c_ != nullptr)
+    MVAddRow(1.0f, 1.0f, bias_c_->data(), new_memory);
+  Mult<float>(*reset_gate, *new_memory, new_memory);
+  GEMM(1.0f, 1.0f, *context, *w_c_hh_t, new_memory);
+  Map<op::Tanh<float>, float>(*new_memory, new_memory);
+
+
+  Sub(*context, *new_memory, &data_);
   Mult(data_, *update_gate, &data_);
-  AXPY(1.0f, *context, &data_);
+  Add(data_, *new_memory, &data_);
 
-       // delete the pointers
-       if (srclayers.size() == 1)
+  // delete the pointers
+  if (srclayers.size() == 1)
     delete context;
 
-       delete w_z_hx_t;
-       delete w_z_hh_t;
-       delete w_r_hx_t;
-       delete w_r_hh_t;
-       delete w_c_hx_t;
-       delete w_c_hh_t;
+  delete w_z_hx_t;
+  delete w_z_hh_t;
+  delete w_r_hx_t;
+  delete w_r_hh_t;
+  delete w_c_hx_t;
+  delete w_c_hh_t;
 }
 
 void GRULayer::ComputeGradient(int flag,
     const vector<Layer*>& srclayers) {
-       CHECK_LE(srclayers.size(), 2);
-  // agg grad from two dst layers
+  CHECK_LE(srclayers.size(), 2);
+  // agg grad from two dst layers, gradvec_[0] is grad_
   AXPY(1.0f, *gradvec_[1], &grad_);
-  float beta = 1.0f; // agg param gradients
-
-  Layer* ilayer = srclayers[0]; // input layer
-  Layer* clayer = nullptr; // context layer
-       // Prepare the data input and the context
-       const Blob<float>& src = ilayer->data(this);
-       const Blob<float> *context;
-       if (srclayers.size() == 1) { // only have data input
-               context = new Blob<float>(batchsize_, hdim_);
-       } else { // have data input & context
+  float beta = 1.0f;  // agg param gradients
+
+  Layer* ilayer = srclayers[0];  // input layer
+  Layer* clayer = nullptr;  // context layer
+  // Prepare the data input and the context
+  const Blob<float>& src = ilayer->data(this);
+  const Blob<float> *context;
+  if (srclayers.size() == 1) {  // only have data input
+    context = new Blob<float>(batchsize_, hdim_);
+  } else {  // have data input & context
     clayer = srclayers[1];
-               context = &(clayer->data(this));
-       }
-
-       // Prepare gradient of output neurons
-       Blob<float> *grad_t = Transpose (grad_);
-
-       // Compute intermediate gradients which are used for other computations
-       Blob<float> dugatedz (batchsize_, hdim_);
-       Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
-       Blob<float> drgatedr (batchsize_, hdim_);
-       Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
-       Blob<float> dnewmdc (batchsize_, hdim_);
-       Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
-
-       Blob<float> dLdz (batchsize_, hdim_);
-       Sub<float>(*new_memory, *context, &dLdz);
-       Mult<float>(dLdz, grad_, &dLdz);
-       Mult<float>(dLdz, dugatedz, &dLdz);
-
-       Blob<float> dLdc (batchsize_,hdim_);
-       Mult(grad_, *update_gate, &dLdc);
-       Mult(dLdc, dnewmdc, &dLdc);
-
-       Blob<float> reset_dLdc (batchsize_,hdim_);
-  GEMM(1.0f, 0.0f, dLdc, weight_c_hh_->data(), &reset_dLdc);
-
-       Blob<float> dLdr (batchsize_, hdim_);
-       Mult(reset_dLdc, *context, &dLdr);
-       Mult(dLdr, drgatedr, &dLdr);
-
-       // Compute gradients for parameters of update gate
-       Blob<float> *dLdz_t = Transpose(dLdz);
-       GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
-       GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
-       if (bias_z_ != nullptr)
-               MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
-       delete dLdz_t;
-
-       // Compute gradients for parameters of reset gate
-       Blob<float> *dLdr_t = Transpose(dLdr);
-       GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
-       GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
-       if (bias_r_ != nullptr)
-               MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
-       delete dLdr_t;
-
-       // Compute gradients for parameters of new memory
-       Blob<float> *dLdc_t = Transpose(dLdc);
-       GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
-       GEMM(1.0f, beta, *dLdc_t, *reset_context, weight_c_hh_->mutable_grad());
-       if (bias_c_ != nullptr)
-               MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
-       delete dLdc_t;
-
-       // Compute gradients for data input layer
-       if (srclayers[0]->mutable_grad(this) != nullptr) {
-               GEMM(1.0f,0.0f, dLdc, weight_c_hx_->data(), 
ilayer->mutable_grad(this));
-               GEMM(1.0f,1.0f, dLdz, weight_z_hx_->data(), 
ilayer->mutable_grad(this));
-               GEMM(1.0f,1.0f, dLdr, weight_r_hx_->data(), 
ilayer->mutable_grad(this));
-       }
-
-       if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
-               // Compute gradients for context layer
-    Mult(reset_dLdc, *reset_gate, clayer->mutable_grad(this));
-               GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), 
clayer->mutable_grad(this));
-               GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), 
clayer->mutable_grad(this));
-               AXPY(-1.0f, *update_gate, clayer->mutable_grad(this));
+    context = &(clayer->data(this));
+  }
+
+  // Compute intermediate gradients which are used for other computations
+  Blob<float> dugatedz(batchsize_, hdim_);
+  Map<singa::op::SigmoidGrad<float>, float>(*update_gate, &dugatedz);
+  Blob<float> drgatedr(batchsize_, hdim_);
+  Map<singa::op::SigmoidGrad<float>, float>(*reset_gate, &drgatedr);
+  Blob<float> dnewmdc(batchsize_, hdim_);
+  Map<singa::op::TanhGrad<float>, float>(*new_memory, &dnewmdc);
+
+  Blob<float> dLdz(batchsize_, hdim_);
+  Sub<float>(*context, *new_memory, &dLdz);
+  Mult<float>(dLdz, grad_, &dLdz);
+  Mult<float>(dLdz, dugatedz, &dLdz);
+
+  Blob<float> dLdc(batchsize_, hdim_);
+  Blob<float> z1(batchsize_, hdim_);
+  z1.SetValue(1.0f);
+  AXPY<float>(-1.0f, *update_gate, &z1);
+  Mult(grad_, z1, &dLdc);
+  Mult(dLdc, dnewmdc, &dLdc);
+
+  Blob<float> reset_dLdc(batchsize_, hdim_);
+  Mult(dLdc, *reset_gate, &reset_dLdc);
+
+  Blob<float> dLdr(batchsize_, hdim_);
+  Blob<float> cprev(batchsize_, hdim_);
+  GEMM(1.0f, 0.0f, *context, weight_c_hh_->data().T(), &cprev);
+  Mult(dLdc, cprev, &dLdr);
+  Mult(dLdr, drgatedr, &dLdr);
+
+  // Compute gradients for parameters of update gate
+  Blob<float> *dLdz_t = Transpose(dLdz);
+  GEMM(1.0f, beta, *dLdz_t, src, weight_z_hx_->mutable_grad());
+  GEMM(1.0f, beta, *dLdz_t, *context, weight_z_hh_->mutable_grad());
+  if (bias_z_ != nullptr)
+    MVSumRow<float>(1.0f, beta, dLdz, bias_z_->mutable_grad());
+  delete dLdz_t;
+
+  // Compute gradients for parameters of reset gate
+  Blob<float> *dLdr_t = Transpose(dLdr);
+  GEMM(1.0f, beta, *dLdr_t, src, weight_r_hx_->mutable_grad());
+  GEMM(1.0f, beta, *dLdr_t, *context, weight_r_hh_->mutable_grad());
+  if (bias_r_ != nullptr)
+    MVSumRow(1.0f, beta, dLdr, bias_r_->mutable_grad());
+  delete dLdr_t;
+
+  // Compute gradients for parameters of new memory
+  Blob<float> *dLdc_t = Transpose(dLdc);
+  GEMM(1.0f, beta, *dLdc_t, src, weight_c_hx_->mutable_grad());
+  if (bias_c_ != nullptr)
+    MVSumRow(1.0f, beta, dLdc, bias_c_->mutable_grad());
+  delete dLdc_t;
+
+  Blob<float> *reset_dLdc_t = Transpose(reset_dLdc);
+  GEMM(1.0f, beta, *reset_dLdc_t, *context, weight_c_hh_->mutable_grad());
+  delete reset_dLdc_t;
+
+  // Compute gradients for data input layer
+  if (srclayers[0]->mutable_grad(this) != nullptr) {
+    GEMM(1.0f, 0.0f, dLdc, weight_c_hx_->data(), ilayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdz, weight_z_hx_->data(), ilayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdr, weight_r_hx_->data(), ilayer->mutable_grad(this));
+  }
+
+  if (clayer != nullptr && clayer->mutable_grad(this) != nullptr) {
+    // Compute gradients for context layer
+    GEMM(1.0f, 0.0f, reset_dLdc, weight_c_hh_->data(),
+        clayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdr, weight_r_hh_->data(), clayer->mutable_grad(this));
+    GEMM(1.0f, 1.0f, dLdz, weight_z_hh_->data(), clayer->mutable_grad(this));
+    Add(clayer->grad(this), *update_gate, clayer->mutable_grad(this));
     // LOG(ERROR) << "grad to prev gru " << Asum(clayer->grad(this));
-       }
+  }
 
-       if (srclayers.size() == 1)
+  if (srclayers.size() == 1)
     delete context;
-       else
-    context = NULL;
-       delete grad_t;
 }
 
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/neuralnet/output_layer/char_rnn.cc
----------------------------------------------------------------------
diff --git a/src/neuralnet/output_layer/char_rnn.cc 
b/src/neuralnet/output_layer/char_rnn.cc
new file mode 100644
index 0000000..c3f1733
--- /dev/null
+++ b/src/neuralnet/output_layer/char_rnn.cc
@@ -0,0 +1,51 @@
+/************************************************************
+*
+* Licensed to the Apache Software Foundation (ASF) under one
+* or more contributor license agreements.  See the NOTICE file
+* distributed with this work for additional information
+* regarding copyright ownership.  The ASF licenses this file
+* to you under the Apache License, Version 2.0 (the
+* "License"); you may not use this file except in compliance
+* with the License.  You may obtain a copy of the License at
+*
+*   http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing,
+* software distributed under the License is distributed on an
+* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+* KIND, either express or implied.  See the License for the
+* specific language governing permissions and limitations
+* under the License.
+*
+*************************************************************/
+
+#include <algorithm>
+#include <iostream>
+#include <fstream>
+#include "singa/neuralnet/output_layer.h"
+
+namespace singa {
+
+void CharRNNOutputLayer::Setup(const LayerProto& proto,
+    const vector<Layer*>& srclayers) {
+  CHECK_EQ(srclayers.size(), 1);
+  OutputLayer::Setup(proto, srclayers);
+  std::ifstream fin;
+  const string path = proto.char_rnn_conf().vocab_path();
+  fin.open(path);
+  CHECK(fin.is_open()) << "Can't open vocab_path = " << path;
+  std::stringstream stream;
+  stream << fin.rdbuf();
+  vocab_ = stream.str();
+  fin.close();
+}
+
+void CharRNNOutputLayer::ComputeFeature(int flag,
+    const vector<Layer*>& srclayers) {
+  const float* dptr =  srclayers[0]->data(this).cpu_data();
+  for (int i = 0; i < srclayers[0]->data(this).shape(0); i++) {
+    std::cout<<vocab_[static_cast<int>(dptr[i])];
+  }
+}
+
+}  // namespace singa;

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/utils/updater.cc
----------------------------------------------------------------------
diff --git a/src/utils/updater.cc b/src/utils/updater.cc
index 200670a..1b3e26c 100644
--- a/src/utils/updater.cc
+++ b/src/utils/updater.cc
@@ -140,6 +140,9 @@ void SGDUpdater::Update(int step, Param* param, float 
grad_scale) {
 
 /***********************Nesterov******************************/
 void NesterovUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
+
   Shape<1> s = Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);
@@ -181,6 +184,9 @@ void RMSPropUpdater::Init(const UpdaterProto& proto) {
 }
 
 void RMSPropUpdater::Update(int step, Param* param, float grad_scale) {
+ if (clip_high_ > clip_low_)
+    Clip(clip_low_, clip_high_, param);
+
   Shape<1> s=Shape1(param->size());
   Tensor<cpu, 1> data(param->mutable_cpu_data(), s);
   Tensor<cpu, 1> grad(param->mutable_cpu_grad(), s);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/a2f4e468/src/worker.cc
----------------------------------------------------------------------
diff --git a/src/worker.cc b/src/worker.cc
index abe74e7..2afa8b0 100644
--- a/src/worker.cc
+++ b/src/worker.cc
@@ -433,7 +433,8 @@ void BPTTWorker::Backward(int step, NeuralNet* net) {
   for (auto it = layers.rbegin(); it != layers.rend(); it++) {
     Layer* layer = *it;
     if (layer->partition_id() == id_) {
-      layer->ComputeGradient(kTrain | kBackward | kAggGrad, 
net->srclayers(layer));
+      layer->ComputeGradient(kTrain | kBackward | kAggGrad,
+          net->srclayers(layer));
       // LOG(ERROR) << layer->name() << " backward";
       if (job_conf_.debug() && DisplayNow(step) && grp_id_ == 0)
         label[layer->name()] = layer->ToString(true, kTrain | kBackward);

[10/10] incubator-singa git commit: SINGA-120 - Implemented GRU and BPTT

Reply via email to