SINGA-10 Add Support for Recurrent Neural Networks (RNN) Fix bugs from OutputLayer (wid-start). Can reach 79 ppl, but need to optimize the training speed.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/e8e07f10 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/e8e07f10 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/e8e07f10 Branch: refs/heads/master Commit: e8e07f10eb2e5cd1585402c0682f41d329106ddb Parents: 3dc1eee Author: Wei Wang <[email protected]> Authored: Wed Sep 16 00:46:50 2015 +0800 Committer: Wei Wang <[email protected]> Committed: Fri Sep 18 16:46:41 2015 +0800 ---------------------------------------------------------------------- examples/rnnlm/create_shard.cc | 49 ++++++++++++++++++++++++++++++------- examples/rnnlm/job.conf | 6 ++--- examples/rnnlm/rnnlm.cc | 19 ++++++++------ examples/rnnlm/rnnlm.h | 4 ++- 4 files changed, 57 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/create_shard.cc ---------------------------------------------------------------------- diff --git a/examples/rnnlm/create_shard.cc b/examples/rnnlm/create_shard.cc index dd56a84..f337350 100644 --- a/examples/rnnlm/create_shard.cc +++ b/examples/rnnlm/create_shard.cc @@ -1,16 +1,45 @@ +/* + * This file include code from rnnlmlib-0.4 whose licence is as follows: +Copyright (c) 2010-2012 Tomas Mikolov +Copyright (c) 2013 Cantab Research Ltd +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither name of copyright holders nor the names of its contributors +may be used to endorse or promote products derived from this software +without specific prior written permission. + + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ // // This code creates DataShard for RNNLM dataset. -// It is adapted from the convert_mnist_data from Caffe // The RNNLM dataset could be downloaded at // http://www.rnnlm.org/ // // Usage: // create_shard.bin -train train_file -class_size [-debug] [-valid valid_file] [-test test_file] -#include <gflags/gflags.h> -#include <glog/logging.h> - - #include "utils/data_shard.h" #include "utils/common.h" #include "proto/common.pb.h" @@ -255,14 +284,15 @@ int init_class() { return 0; } -int create_shard(char *input_file, char *output_file) { +int create_shard(const char *input_file, const char *output_file) { DataShard dataShard(output_file, DataShard::kCreate); singa::WordRecord wordRecord; - char word[MAX_STRING]; + char word[MAX_STRING], str_buffer[32]; FILE *fin; int a, i; fin = fopen(input_file, "rb"); + int wcnt = 0; while (1) { readWord(word, fin); if (feof(fin)) break; @@ -276,7 +306,8 @@ int create_shard(char *input_file, char *output_file) { wordRecord.set_class_index(class_idx); wordRecord.set_class_start(class_start[class_idx]); wordRecord.set_class_end(class_end[class_idx]); - dataShard.Insert(word, wordRecord); + int length = snprintf(str_buffer, 32, "%05d", wcnt++); + dataShard.Insert(string(str_buffer, length), wordRecord); } } @@ -397,4 +428,4 @@ int main(int argc, char **argv) { if (test_mode) create_shard(test_file, "test_shard"); return 0; -} \ No newline at end of file +} http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/job.conf ---------------------------------------------------------------------- diff --git a/examples/rnnlm/job.conf b/examples/rnnlm/job.conf index 81bdb94..98a4157 100644 --- a/examples/rnnlm/job.conf +++ b/examples/rnnlm/job.conf @@ -5,7 +5,7 @@ cluster { nworkers_per_group: 1 nservers_per_procs: 1 nworkers_per_procs: 1 - workspace: "examples/rnnlm/" + workspace: "examples/rnnlm/" } name: "recurrent-neural-network-language-model" @@ -15,7 +15,7 @@ train_steps:214050 test_steps:583 test_freq:7135 #disp_freq is specific to training -disp_freq:7135 +disp_freq:1000 train_one_batch { alg: kBP @@ -115,7 +115,7 @@ layer{ vocab_size: 3720 } param{ - name: "w3" + name: "w3" init { type: kUniform low:-0.3 http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/rnnlm.cc ---------------------------------------------------------------------- diff --git a/examples/rnnlm/rnnlm.cc b/examples/rnnlm/rnnlm.cc index 4d39b5f..0ad29a6 100644 --- a/examples/rnnlm/rnnlm.cc +++ b/examples/rnnlm/rnnlm.cc @@ -47,17 +47,14 @@ void RnnDataLayer::ComputeFeature(int flag, Metric *perf) { CHECK(records_.size() <= shard_->Count()); records_[0] = records_[window_]; window_ = max_window_; - singa::WordRecord wr; for (int i = 1; i <= max_window_; i++) { string key; if (shard_->Next(&key, &records_[i])) { - wr = records_[i]; - if(wr.word_index() == 0) { - window_ = i; + if(records_[i].word_index() == 0) { + window_ = i; // +1 ?? break; } - } - else{ + } else{ shard_->SeekToFirst(); CHECK(shard_->Next(&key, &records_[i])); } @@ -68,6 +65,7 @@ void RnnDataLayer::ComputeFeature(int flag, Metric *perf) { void WordLayer::Setup(const LayerProto& proto, int npartitions) { Layer::Setup(proto, npartitions); CHECK_EQ(srclayers_.size(), 1); + LOG(ERROR) << srclayers_[0]->name(); int max_window = static_cast<RnnDataLayer*>(srclayers_[0])->max_window(); LOG(ERROR) << "clee " << max_window; data_.Reshape(vector<int>{max_window}); @@ -213,7 +211,7 @@ void OutputLayer::Setup(const LayerProto& proto, int npartitions) { int nclass = proto.GetExtension(output_conf).nclass(); word_weight_ = Param::Create(proto.param(0)); word_weight_->Setup(vector<int>{vocab_size, vdim}); - class_weight_ = Param::Create(proto.param(0)); + class_weight_ = Param::Create(proto.param(1)); class_weight_->Setup(vector<int>{nclass, vdim}); pword_.resize(max_window); @@ -234,6 +232,7 @@ void OutputLayer::ComputeFeature(int flag, Metric* perf) { int end = static_cast<int>(label[t * 4 + 1]); auto wordWeight = word_weight.Slice(start, end); + CHECK_GT(end, start); pword_[t].Reshape(vector<int>{end-start}); auto pword = RTensor1(&pword_[t]); pword = dot(src[t], wordWeight.T()); @@ -244,6 +243,8 @@ void OutputLayer::ComputeFeature(int flag, Metric* perf) { int wid = static_cast<int>(label[t * 4 + 2]); int cid = static_cast<int>(label[t * 4 + 3]); + CHECK_GT(end, wid); + CHECK_GE(wid, start); loss += -log(std::max(pword[wid - start] * pclass[t][cid], FLT_MIN)); ppl += log10(std::max(pword[wid - start] * pclass[t][cid], FLT_MIN)); } @@ -269,11 +270,13 @@ void OutputLayer::ComputeGradient(int flag, Metric* perf) { int wid = static_cast<int>(label[t * 4 + 2]); int cid = static_cast<int>(label[t * 4 + 3]); auto pword = RTensor1(&pword_[t]); + CHECK_GT(end, wid); + CHECK_GE(wid, start); // gL/gclass_act pclass[t][cid] -= 1.0; // gL/gword_act - pword[wid] -= 1.0; + pword[wid - start] -= 1.0; // gL/gword_weight gword_weight.Slice(start, end) += dot(pword.FlatTo2D().T(), src[t].FlatTo2D()); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/e8e07f10/examples/rnnlm/rnnlm.h ---------------------------------------------------------------------- diff --git a/examples/rnnlm/rnnlm.h b/examples/rnnlm/rnnlm.h index e9b7c55..888ebe7 100644 --- a/examples/rnnlm/rnnlm.h +++ b/examples/rnnlm/rnnlm.h @@ -32,10 +32,12 @@ class RnnDataLayer : public RNNLayer { void Setup(const LayerProto& proto, int npartitions) override; void ComputeFeature(int flag, Metric *perf) override; void ComputeGradient(int flag, Metric* perf) override {} + ConnectionType dst_layer_connection() const override { + return kOneToMany; + } int max_window() const { return max_window_; } - const std::vector<singa::WordRecord>& records() const { return records_; }
