[6/7] incubator-singa git commit: SINGA-10 Add Support for Recurrent Neural Networks (RNN)

wangwei Sun, 20 Sep 2015 05:36:54 -0700

SINGA-10 Add Support for Recurrent Neural Networks (RNN)

Fix bugs from OutputLayer (wid-start).
Can reach 79 ppl, but need to optimize the training speed.



Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e8e07f10
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e8e07f10
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e8e07f10

Branch: refs/heads/master
Commit: e8e07f10eb2e5cd1585402c0682f41d329106ddb
Parents: 3dc1eee
Author: Wei Wang <[email protected]>
Authored: Wed Sep 16 00:46:50 2015 +0800
Committer: Wei Wang <[email protected]>
Committed: Fri Sep 18 16:46:41 2015 +0800

----------------------------------------------------------------------
 examples/rnnlm/create_shard.cc | 49 ++++++++++++++++++++++++++++++-------
 examples/rnnlm/job.conf        |  6 ++---
 examples/rnnlm/rnnlm.cc        | 19 ++++++++------
 examples/rnnlm/rnnlm.h         |  4 ++-
 4 files changed, 57 insertions(+), 21 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/create_shard.cc
----------------------------------------------------------------------
diff --git a/examples/rnnlm/create_shard.cc b/examples/rnnlm/create_shard.cc
index dd56a84..f337350 100644
--- a/examples/rnnlm/create_shard.cc
+++ b/examples/rnnlm/create_shard.cc
@@ -1,16 +1,45 @@
+/*
+ * This file include code from rnnlmlib-0.4 whose licence is as follows:
+Copyright (c) 2010-2012 Tomas Mikolov
+Copyright (c) 2013 Cantab Research Ltd
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither name of copyright holders nor the names of its contributors
+may be used to endorse or promote products derived from this software
+without specific prior written permission.
+
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
 //
 // This code creates DataShard for RNNLM dataset.
-// It is adapted from the convert_mnist_data from Caffe
 // The RNNLM dataset could be downloaded at
 //    http://www.rnnlm.org/
 //
 // Usage:
 //    create_shard.bin -train train_file -class_size [-debug] [-valid 
valid_file] [-test test_file]
 
-#include <gflags/gflags.h>
-#include <glog/logging.h>
-
-
 #include "utils/data_shard.h"
 #include "utils/common.h"
 #include "proto/common.pb.h"
@@ -255,14 +284,15 @@ int init_class() {
     return 0;
 }
 
-int create_shard(char *input_file, char *output_file) {
+int create_shard(const char *input_file, const char *output_file) {
     DataShard dataShard(output_file, DataShard::kCreate);
     singa::WordRecord wordRecord;
 
-    char word[MAX_STRING];
+    char word[MAX_STRING], str_buffer[32];
     FILE *fin;
     int a, i;
     fin = fopen(input_file, "rb");
+    int wcnt = 0;
     while (1) {
         readWord(word, fin);
         if (feof(fin)) break;
@@ -276,7 +306,8 @@ int create_shard(char *input_file, char *output_file) {
             wordRecord.set_class_index(class_idx);
             wordRecord.set_class_start(class_start[class_idx]);
             wordRecord.set_class_end(class_end[class_idx]);
-            dataShard.Insert(word, wordRecord);
+            int length = snprintf(str_buffer, 32, "%05d", wcnt++);
+            dataShard.Insert(string(str_buffer, length), wordRecord);
         }
     }
 
@@ -397,4 +428,4 @@ int main(int argc, char **argv) {
     if (test_mode) create_shard(test_file, "test_shard");
 
     return 0;
-}
\ No newline at end of file
+}

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/job.conf
----------------------------------------------------------------------
diff --git a/examples/rnnlm/job.conf b/examples/rnnlm/job.conf
index 81bdb94..98a4157 100644
--- a/examples/rnnlm/job.conf
+++ b/examples/rnnlm/job.conf
@@ -5,7 +5,7 @@ cluster {
   nworkers_per_group: 1
   nservers_per_procs: 1
   nworkers_per_procs: 1
-  workspace: "examples/rnnlm/" 
+  workspace: "examples/rnnlm/"
 }
 
 name: "recurrent-neural-network-language-model"
@@ -15,7 +15,7 @@ train_steps:214050
 test_steps:583
 test_freq:7135
 #disp_freq is specific to training
-disp_freq:7135
+disp_freq:1000
 
 train_one_batch {
   alg: kBP
@@ -115,7 +115,7 @@ layer{
     vocab_size: 3720
   }
   param{
-    name: "w3" 
+    name: "w3"
     init {
       type: kUniform
       low:-0.3

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/rnnlm.cc
----------------------------------------------------------------------
diff --git a/examples/rnnlm/rnnlm.cc b/examples/rnnlm/rnnlm.cc
index 4d39b5f..0ad29a6 100644
--- a/examples/rnnlm/rnnlm.cc
+++ b/examples/rnnlm/rnnlm.cc
@@ -47,17 +47,14 @@ void RnnDataLayer::ComputeFeature(int flag, Metric *perf) {
   CHECK(records_.size() <= shard_->Count());
   records_[0] = records_[window_];
   window_ = max_window_;
-  singa::WordRecord wr;
   for (int i = 1; i <= max_window_; i++) {
     string key;
     if (shard_->Next(&key, &records_[i])) {
-      wr = records_[i];
-      if(wr.word_index() == 0) {
-        window_ = i;
+      if(records_[i].word_index() == 0) {
+        window_ = i;  // +1 ??
         break;
       }
-    }
-    else{
+    } else{
       shard_->SeekToFirst();
       CHECK(shard_->Next(&key, &records_[i]));
     }
@@ -68,6 +65,7 @@ void RnnDataLayer::ComputeFeature(int flag, Metric *perf) {
 void WordLayer::Setup(const LayerProto& proto, int npartitions) {
   Layer::Setup(proto, npartitions);
   CHECK_EQ(srclayers_.size(), 1);
+  LOG(ERROR) << srclayers_[0]->name();
   int max_window = static_cast<RnnDataLayer*>(srclayers_[0])->max_window();
   LOG(ERROR) << "clee " << max_window;
   data_.Reshape(vector<int>{max_window});
@@ -213,7 +211,7 @@ void OutputLayer::Setup(const LayerProto& proto, int 
npartitions) {
   int nclass = proto.GetExtension(output_conf).nclass();
   word_weight_ = Param::Create(proto.param(0));
   word_weight_->Setup(vector<int>{vocab_size, vdim});
-  class_weight_ = Param::Create(proto.param(0));
+  class_weight_ = Param::Create(proto.param(1));
   class_weight_->Setup(vector<int>{nclass, vdim});
 
   pword_.resize(max_window);
@@ -234,6 +232,7 @@ void OutputLayer::ComputeFeature(int flag, Metric* perf) {
     int end = static_cast<int>(label[t * 4 + 1]);
 
     auto wordWeight = word_weight.Slice(start, end);
+    CHECK_GT(end, start);
     pword_[t].Reshape(vector<int>{end-start});
     auto pword = RTensor1(&pword_[t]);
     pword = dot(src[t], wordWeight.T());
@@ -244,6 +243,8 @@ void OutputLayer::ComputeFeature(int flag, Metric* perf) {
 
     int wid = static_cast<int>(label[t * 4 + 2]);
     int cid = static_cast<int>(label[t * 4 + 3]);
+    CHECK_GT(end, wid);
+    CHECK_GE(wid, start);
     loss += -log(std::max(pword[wid - start] * pclass[t][cid], FLT_MIN));
     ppl += log10(std::max(pword[wid - start] * pclass[t][cid], FLT_MIN));
   }
@@ -269,11 +270,13 @@ void OutputLayer::ComputeGradient(int flag, Metric* perf) 
{
     int wid = static_cast<int>(label[t * 4 + 2]);
     int cid = static_cast<int>(label[t * 4 + 3]);
     auto pword = RTensor1(&pword_[t]);
+    CHECK_GT(end, wid);
+    CHECK_GE(wid, start);
 
     // gL/gclass_act
     pclass[t][cid] -= 1.0;
     // gL/gword_act
-    pword[wid] -= 1.0;
+    pword[wid - start] -= 1.0;
 
     // gL/gword_weight
     gword_weight.Slice(start, end) += dot(pword.FlatTo2D().T(), 
src[t].FlatTo2D());

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/rnnlm.h
----------------------------------------------------------------------
diff --git a/examples/rnnlm/rnnlm.h b/examples/rnnlm/rnnlm.h
index e9b7c55..888ebe7 100644
--- a/examples/rnnlm/rnnlm.h
+++ b/examples/rnnlm/rnnlm.h
@@ -32,10 +32,12 @@ class RnnDataLayer : public RNNLayer {
   void Setup(const LayerProto& proto, int npartitions) override;
   void ComputeFeature(int flag, Metric *perf) override;
   void ComputeGradient(int flag, Metric* perf) override {}
+  ConnectionType dst_layer_connection() const override {
+    return kOneToMany;
+  }
   int max_window() const {
     return max_window_;
   }
-
   const std::vector<singa::WordRecord>& records() const {
     return records_;
   }

[6/7] incubator-singa git commit: SINGA-10 Add Support for Recurrent Neural Networks (RNN)

Reply via email to