Author: tommaso
Date: Sun Oct  9 06:32:50 2016
New Revision: 1763945

URL: http://svn.apache.org/viewvc?rev=1763945&view=rev
Log:
rnn loosing memory of previous hidden state - fixed

Modified:
    labs/yay/trunk/core/src/main/java/org/apache/yay/CharRNN.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/WordRNN.java
    
labs/yay/trunk/core/src/test/java/org/apache/yay/CharRNNCrossValidationTest.java
    
labs/yay/trunk/core/src/test/java/org/apache/yay/WordRNNCrossValidationTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/CharRNN.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/CharRNN.java?rev=1763945&r1=1763944&r2=1763945&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/CharRNN.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/CharRNN.java Sun Oct  9 
06:32:50 2016
@@ -224,6 +224,8 @@ public class CharRNN {
       loss += -Transforms.log(ps.getRow(t).getRow(targets.getInt(t)), 
true).sumNumber().doubleValue(); // softmax (cross-entropy loss)
     }
 
+    this.hPrev = hs.getRow(inputs.length() - 1);
+
     // backward pass: compute gradients going backwards
     INDArray dhNext = Nd4j.zerosLike(hs.getRow(0));
     for (int t = inputs.length() - 1; t >= 0; t--) {

Modified: labs/yay/trunk/core/src/main/java/org/apache/yay/WordRNN.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/WordRNN.java?rev=1763945&r1=1763944&r2=1763945&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/WordRNN.java (original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/WordRNN.java Sun Oct  9 
06:32:50 2016
@@ -142,7 +142,7 @@ public class WordRNN {
         System.out.println("loss is NaN (over/underflow occured, try adjusting 
hyperparameters)");
         break;
       }
-      if (n % 1000 == 0) {
+      if (n % 100 == 0) {
         System.out.printf("iter %d, loss: %f\n", n, smoothLoss); // print 
progress
       }
 
@@ -223,6 +223,8 @@ public class WordRNN {
       loss += -Transforms.log(ps.getRow(t).getRow(targets.getInt(t)), 
true).sumNumber().doubleValue(); // softmax (cross-entropy loss)
     }
 
+    this.hPrev = hs.getRow(inputs.length() - 1);
+
     // backward pass: compute gradients going backwards
     INDArray dhNext = Nd4j.zerosLike(hs.getRow(0));
     for (int t = inputs.length() - 1; t >= 0; t--) {

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/CharRNNCrossValidationTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/CharRNNCrossValidationTest.java?rev=1763945&r1=1763944&r2=1763945&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/test/java/org/apache/yay/CharRNNCrossValidationTest.java
 (original)
+++ 
labs/yay/trunk/core/src/test/java/org/apache/yay/CharRNNCrossValidationTest.java
 Sun Oct  9 06:32:50 2016
@@ -53,6 +53,7 @@ public class CharRNNCrossValidationTest
             {1e-1f, 25, 100}, {1e-1f, 200, 50}, {1e-1f, 200, 40}, {1e-1f, 100, 
30}, {1e-1f, 100, 20}, {1e-1f, 250, 20}, {1e-1f, 250, 15},
             {1e-2f, 50, 64}, {3e-2f, 50, 128}, {1e-2f, 100, 128}, {1e-2f, 100, 
256}, {1e-2f, 100, 512}, {1e-2f, 100, 128},
             {1e-3f, 100, 256}, {1e-3f, 100, 512}, {1e-4f, 100, 128}, {1e-4f, 
100, 256},
+            {1e-3f, 100, 100},
     });
   }
 
@@ -61,7 +62,7 @@ public class CharRNNCrossValidationTest
     System.out.println("hyperparameters: " + learningRate + ", " + seqLength + 
", " + hiddenLayerSize);
     InputStream resourceAsStream = 
getClass().getResourceAsStream("/word2vec/abstracts.txt");
     String text = IOUtils.toString(resourceAsStream);
-    int epochs = 20;
+    int epochs = 1000000;
     CharRNN charRNN = new CharRNN(learningRate, seqLength, hiddenLayerSize, 
epochs, text);
     List<String> words = Arrays.asList(text.split(" "));
     charRNN.learn();

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/WordRNNCrossValidationTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/WordRNNCrossValidationTest.java?rev=1763945&r1=1763944&r2=1763945&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/test/java/org/apache/yay/WordRNNCrossValidationTest.java
 (original)
+++ 
labs/yay/trunk/core/src/test/java/org/apache/yay/WordRNNCrossValidationTest.java
 Sun Oct  9 06:32:50 2016
@@ -53,7 +53,7 @@ public class WordRNNCrossValidationTest
             {1e-1f, 25, 100}, {1e-1f, 200, 50}, {1e-1f, 200, 40}, {1e-1f, 100, 
30}, {1e-1f, 100, 20}, {1e-1f, 250, 20}, {1e-1f, 250, 15},
             {1e-2f, 50, 64}, {3e-2f, 50, 128}, {1e-2f, 100, 128}, {1e-2f, 100, 
256}, {1e-2f, 100, 512}, {1e-2f, 100, 128},
             {1e-3f, 100, 256}, {1e-3f, 100, 512}, {1e-4f, 100, 128}, {1e-4f, 
100, 256},
-            {1e-4f, 200, 1000},
+            {2e-1f, 25, 100},
     });
   }
 
@@ -62,23 +62,13 @@ public class WordRNNCrossValidationTest
     System.out.println("hyperparameters: " + learningRate + ", " + seqLength + 
", " + hiddenLayerSize);
     InputStream resourceAsStream = 
getClass().getResourceAsStream("/word2vec/abstracts.txt");
     String text = IOUtils.toString(resourceAsStream);
-    int epochs = 100;
+    int epochs = 100000;
     WordRNN wordRNN = new WordRNN(learningRate, seqLength, hiddenLayerSize, 
epochs, text);
-    List<String> words = Arrays.asList(text.split(" "));
     wordRNN.learn();
     for (int i = 0; i < 10; i++) {
-      double c = 0;
       String sample = wordRNN.sample(r.nextInt(wordRNN.getVocabSize()));
-      String[] sampleWords = sample.split(" ");
-      for (String sw : sampleWords) {
-        if (words.contains(sw)) {
-          c++;
-        }
-      }
-      if (c > 0) {
-        c /= sample.length();
-      }
-      System.out.println("correct word ratio: " + c);
+      System.out.println(sample);
+      System.out.println("***");
     }
   }
 

Modified: labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt?rev=1763945&r1=1763944&r2=1763945&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt Sun Oct  9 
06:32:50 2016
@@ -3,14 +3,14 @@ With this goal in mind , we present a fo
 It turns out that the operators developed in this way are precisely the 
connectives of quantum logic ( Birkhoff and von Neumann , 1936 ) , which to our 
knowledge have not been exploited before in natural language processing .
 In quantum logic , arbitrary sets are replaced by linear subspaces of a vector 
space , and set unions , intersections and complements are replaced by vector 
sum , intersection and orthogonal complements of subspaces .
 We demonstrate that these logical connectives ( particularly the orthogonal 
complement for negation )  are powerful tools for exploring and analysing word 
meanings and show distinct advantages over Boolean operators in document 
retrieval experiments .
-This paper is organised as follows . 
+This paper is organised as follows :
 In Section 1.1 we describe some of the ways vectors have been used to 
represent the meanings of terms and documents in natural language processing , 
and describe the way the WORD-SPACE used in our later experiments is built 
automatically from text corpora .
 In Section 1.2 we define the logical connectives on vector spaces , focussing 
particularly on negation and disjunction . 
 This introduces the basic material needed to understand the worked examples 
given in Section 1.3 , and the document retrieval experiments described in 
Section 1.3.1 .
 Section 1.4 gives a much fuller outline of the theory of quantum logic , the 
natural setting for the operators of Section 1.2 .
 Finally , in Section 1.5 , we examine the similarities between quantum logic 
and WORD-SPACE , asking whether quantum logic is an appropriate framework for 
modelling word-meanings or if the initial successes we have obtained are mainly 
coincidental . 
 To some extent , this paper may have been written backwards , in that the 
implementation and examples are at the beginning and most of the theory is at 
the end .
-This is for two reasons .
+This is for two reasons :
 Firstly , we hoped to make the paper as accessible as possible and were afraid 
that beginning with an introduction to the full machinery of quantum logic 
would defeat this goal before the reader has a chance to realise that the 
techniques and equations used in this work are really quite elementary .
 Secondly , the link with ‘quantum logic’ was itself only brought to our 
attention after the bulk of the results in this paper had been obtained , and 
since this research is very much ongoing , we deemed it appropriate to give an 
honest account of its history and current state . 
 We propose two novel model architectures for computing continuous vector 
representations of words from very large data sets The quality of these 
representations is measured in a word similarity task , and the results are 
compared to the previously best performing techniques based on different types 
of neural networks .
@@ -73,4 +73,26 @@ The tutorial covers input encoding for n
 The development of intelligent machines is one of the biggest unsolved 
challenges in computer science .
 In this paper , we propose some fundamental properties these machines should 
have , focusing in particular on communication and learning .
 We discuss a simple environment that could be used to incrementally teach a 
machine the basics of natural-language-based communication , as a prerequisite 
to more complex interaction with human users .
-We also present some conjectures on the sort of algorithms the machine should 
support in order to profitably learn from the environment .
\ No newline at end of file
+We also present some conjectures on the sort of algorithms the machine should 
support in order to profitably learn from the environment .
+In this work , we present the first results for neuralizing an Unsupervised 
Hidden Markov Model .
+We evaluate our approach on tag induction .
+Our approach outperforms existing generative models and is competitive with 
the state-of-the-art though with a simpler model easily extended to include 
additional context .
+Deep Neural Networks (DNNs) are powerful models that have achieved excellent 
performance on difficult learning tasks .
+Although DNNs work well whenever large labeled training sets are available , 
they cannot be used to map sequences to sequences .
+In this paper, we present a general end-to-end approach to sequence learning 
that makes minimal assumptions on the sequence structure .
+Our method uses a multilayered Long Short-TermMemory (LSTM) to map the input 
sequence to a vector of a fixed dimensionality , and then another deep LSTM to 
decode the target sequence from the vector .
+Our main result is that on an English to French translation task 
fromtheWMT’14 dataset , the translations produced by the LSTM achieve a BLEU 
score of 34.8 on the entire test set, where the LSTM’s BLEU score was 
penalized on out-of-vocabulary words .
+Additionally , the LSTM did not have difficulty on long sentences . For 
comparison , a phrase-based SMT system achieves a BLEU score of 33.3 on the 
same dataset .
+When we used the LSTM to rerank the 1000 hypotheses produced by the 
aforementioned SMT system , its BLEU score increases to 36.5 , which is close 
to the previous best result on this task.
+The LSTM also learned sensible phrase and sentence representations that are 
sensitive to word order and are relatively invariant to the active and the 
passive voice.
+Finally , we found that reversing the order of the words in all source 
sentences (but not target sentences) improved the LSTM’s performancemarkedly 
, because doing so introduced many short term dependencies between the source 
and the target sentence which made the optimization problem easier .
+We combine Riemannian geometry with the mean field theory of high dimensional 
chaos to study the nature of signal propagation in generic , deep neural 
networks with random weights .
+Our results reveal an order-to-chaos expressivity phase transition , with 
networks in the chaotic phase computing nonlinear functions whose global 
curvature grows exponentially with depth but not width .
+We prove this generic class of deep random functions cannot be efficiently 
computed by any shallow network , going beyond prior work restricted to the 
analysis of single functions .
+Moreover , we formalize and quantitatively demonstrate the long conjectured 
idea that deep networks can disentangle highly curved manifolds in input space 
into flat manifolds in hidden space .
+Our theoretical analysis of the expressive power of deep networks broadly 
applies to arbitrary nonlinearities , and provides a quantitative underpinning 
for previously abstract notions about the geometry of deep functions .
+In this paper , we propose a novel neural network model called RNN 
Encoder–Decoder that consists of two recurrent neural networks (RNN) .
+One RNN encodes a sequence of symbols into a fixedlength vector representation 
, and the other decodes the representation into another sequence of symbols .
+The encoder and decoder of the proposed model are jointly trained to maximize 
the conditional probability of a target sequence given a source sequence .
+The performance of a statistical machine translation system is empirically 
found to improve by using the conditional probabilities of phrase pairs 
computed by the RNN Encoder–Decoder as an additional feature in the existing 
log-linear model .
+Qualitatively, we show that the proposed model learns a semantically and 
syntactically meaningful representation of linguistic phrases .
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org
For additional commands, e-mail: commits-h...@labs.apache.org

Reply via email to