Author: tommaso
Date: Wed Oct  7 10:55:37 2015
New Revision: 1707236

URL: http://svn.apache.org/viewvc?rev=1707236&view=rev
Log:
added iterations parameter to backprop, enhanced word2vec test

Modified:
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 Wed Oct  7 10:55:37 2015
@@ -40,7 +40,7 @@ public class BackPropagationLearningStra
 
   public static final double DEFAULT_THRESHOLD = 0.05;
   public static final int MAX_ITERATIONS = 100000;
-  public static final double DEFAULT_ALPHA = 0.000003;
+  public static final double DEFAULT_ALPHA = 0.0000003;
 
   private final PredictionStrategy<Double, Double> predictionStrategy;
   private final CostFunction<RealMatrix, Double, Double> costFunction;
@@ -106,11 +106,13 @@ public class BackPropagationLearningStra
         // calculate cost
         double newCost = costFunction.calculateAggregatedCost(samples, 
hypothesis);
 
-        if (newCost > cost && batch == -1) {
+        if (Double.POSITIVE_INFINITY == newCost || newCost > cost && batch == 
-1) {
           throw new RuntimeException("failed to converge at iteration " + 
iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + 
newCost);
         } else if (iterations > 1 && (cost == newCost || newCost < threshold 
|| iterations > maxIterations)) {
           System.out.println("successfully converged after " + (iterations - 
1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " 
+ newCost + " and parameters " + Arrays.toString(hypothesis.getParameters()));
           break;
+        } else if (Double.isNaN(newCost)){
+          throw new RuntimeException("failed to converge at iteration " + 
iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
         // update registered cost

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java 
(original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/Word2VecTest.java Wed 
Oct  7 10:55:37 2015
@@ -19,9 +19,14 @@
 package org.apache.yay.core;
 
 import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.io.ObjectOutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -31,7 +36,9 @@ import java.util.List;
 import java.util.Random;
 
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
+import org.apache.commons.math3.linear.MatrixUtils;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.SingularValueDecomposition;
 import org.apache.yay.Feature;
 import org.apache.yay.Input;
 import org.apache.yay.NeuralNetwork;
@@ -58,69 +65,87 @@ public class Word2VecTest {
     Collection<String> fragments = getFragments(sentences, 4);
     assertFalse(fragments.isEmpty());
 
+    // TODO : make it possible to define the no. of hidden units
+    //    int n = new Random().nextInt(20);
     TrainingSet<Double, Double> trainingSet = createTrainingSet(vocabulary, 
fragments);
 
-//    int n = new Random().nextInt(20);
-
     TrainingExample<Double, Double> next = trainingSet.iterator().next();
-    int inputSize = next.getFeatures().size();
+    int inputSize = next.getFeatures().size() ;
     int outputSize = next.getOutput().length;
     RealMatrix[] randomWeights = createRandomWeights(inputSize, inputSize, 
outputSize);
 
     FeedForwardStrategy predictionStrategy = new FeedForwardStrategy(new 
IdentityActivationFunction<Double>());
     BackPropagationLearningStrategy learningStrategy = new 
BackPropagationLearningStrategy(BackPropagationLearningStrategy.
             DEFAULT_ALPHA, -1, 
BackPropagationLearningStrategy.DEFAULT_THRESHOLD, predictionStrategy, new 
LMSCostFunction(),
-            5);
+            10);
     NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(randomWeights, 
learningStrategy, predictionStrategy);
 
     neuralNetwork.learn(trainingSet);
 
-    String word = vocabulary.get(new Random().nextInt(vocabulary.size()));
-//    final Double[] doubles = 
ConversionUtils.toValuesCollection(next.getFeatures()).toArray(new 
Double[next.getFeatures().size()]);
-    final Double[] doubles = hotEncode(word, vocabulary);
-//    String word = hotDecode(doubles, vocabulary);
-
-//    TrainingExample<Double, Double> input = 
ExamplesFactory.createDoubleArrayTrainingExample(new Double[outputSize], 
doubles);
-    Input<Double> input = new TrainingExample<Double, Double>() {
-      @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-        for (Double d : doubles) {
-          Feature<Double> f = new Feature<Double>();
-          f.setValue(d);
-          features.add(f);
+    RealMatrix vectorsMatrix = 
MatrixUtils.createRealMatrix(next.getFeatures().size(), 
next.getOutput().length);
+
+    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new 
File("target/vectors.txt")));
+    int m = 0;
+    for (String word : vocabulary) {
+      final Double[] doubles = hotEncode(word, vocabulary);
+      Input<Double> input = new TrainingExample<Double, Double>() {
+        @Override
+        public ArrayList<Feature<Double>> getFeatures() {
+          ArrayList<Feature<Double>> features = new 
ArrayList<Feature<Double>>();
+          Feature<Double> byasFeature = new Feature<Double>();
+          byasFeature.setValue(1d);
+          features.add(byasFeature);
+          for (Double d : doubles) {
+            Feature<Double> f = new Feature<Double>();
+            f.setValue(d);
+            features.add(f);
+          }
+          return features;
         }
-        return features;
-      }
 
-      @Override
-      public Double[] getOutput() {
-        return new Double[0];
+        @Override
+        public Double[] getOutput() {
+          return new Double[0];
+        }
+      };
+      Double[] predict = neuralNetwork.predict(input);
+      assertNotNull(predict);
+      double[] row = new double[predict.length];
+      for (int x = 0; x < row.length; x++) {
+        row[x] = predict[x];
       }
-    };
-    Double[] predict = neuralNetwork.predict(input);
-    assertNotNull(predict);
-
-    System.out.println(Arrays.toString(predict));
-
-    Double[] wordVec1 = Arrays.copyOfRange(predict, 0, vocabulary.size());
-    assertNotNull(wordVec1);
-    Double[] wordVec2 = Arrays.copyOfRange(predict, vocabulary.size(), 2 * 
vocabulary.size());
-    assertNotNull(wordVec2);
-    Double[] wordVec3 = Arrays.copyOfRange(predict, 2 * vocabulary.size(), 3 * 
vocabulary.size());
-    assertNotNull(wordVec3);
-
-    String word1 = hotDecode(wordVec1, vocabulary);
-    assertNotNull(word1);
-    assertTrue(vocabulary.contains(word1));
-    String word2 = hotDecode(wordVec2, vocabulary);
-    assertNotNull(word2);
-    assertTrue(vocabulary.contains(word2));
-    String word3 = hotDecode(wordVec3, vocabulary);
-    assertNotNull(word3);
-    assertTrue(vocabulary.contains(word3));
+      vectorsMatrix.setRow(m, row);
+      m++;
+
+      String vectorString = Arrays.toString(predict);
+      bufferedWriter.append(vectorString);
+      bufferedWriter.newLine();
+
+      Double[] wordVec1 = Arrays.copyOfRange(predict, 0, vocabulary.size());
+      assertNotNull(wordVec1);
+      Double[] wordVec2 = Arrays.copyOfRange(predict, vocabulary.size(), 2 * 
vocabulary.size());
+      assertNotNull(wordVec2);
+      Double[] wordVec3 = Arrays.copyOfRange(predict, 2 * vocabulary.size(), 3 
* vocabulary.size());
+      assertNotNull(wordVec3);
+
+      String word1 = hotDecode(wordVec1, vocabulary);
+      assertNotNull(word1);
+      assertTrue(vocabulary.contains(word1));
+      String word2 = hotDecode(wordVec2, vocabulary);
+      assertNotNull(word2);
+      assertTrue(vocabulary.contains(word2));
+      String word3 = hotDecode(wordVec3, vocabulary);
+      assertNotNull(word3);
+      assertTrue(vocabulary.contains(word3));
+
+      System.out.println(word + " -> " + word1 + " " + word2 + " " + word3);
+    }
+    bufferedWriter.flush();
+    bufferedWriter.close();
+
+    ObjectOutputStream os = new ObjectOutputStream(new FileOutputStream(new 
File("target/vectors.bin")));
+    MatrixUtils.serializeRealMatrix(vectorsMatrix, os);
 
-    System.out.println(word + " -> " + word1 + " " + word2 + " " + word3);
   }
 
   private String hotDecode(Double[] doubles, List<String> vocabulary) {
@@ -136,7 +161,6 @@ public class Word2VecTest {
     return vocabulary.get(index);
   }
 
-
   private TrainingSet<Double, Double> createTrainingSet(List<String> 
vocabulary, Collection<String> fragments) {
     Collection<TrainingExample<Double, Double>> samples = new 
LinkedList<TrainingExample<Double, Double>>();
     for (String fragment : fragments) {
@@ -170,6 +194,9 @@ public class Word2VecTest {
           @Override
           public ArrayList<Feature<Double>> getFeatures() {
             ArrayList<Feature<Double>> features = new 
ArrayList<Feature<Double>>();
+            Feature<Double> byasFeature = new Feature<Double>();
+            byasFeature.setValue(1d);
+            features.add(byasFeature);
             for (Double d : input) {
               Feature<Double> e = new Feature<Double>();
               e.setValue(d);
@@ -283,4 +310,4 @@ public class Word2VecTest {
     }
     return initialWeights;
   }
-}
+}
\ No newline at end of file

Modified: labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt?rev=1707236&r1=1707235&r2=1707236&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt Wed Oct  7 
10:55:37 2015
@@ -2,14 +2,14 @@ The word2vec software of Tomas Mikolov a
 The learning models behind the software are described in two research papers
 We found the description of the models in these papers to be somewhat cryptic 
and hard to follow
 While the motivations and presentation may be obvious to the neural-networks 
language-modeling crowd we had to struggle quite a bit to figure out the 
rationale behind the equations
-This note is an attempt to explain the negative sampling equation in 
“Distributed Representations of Words and Phrases and their 
Compositionality” by Tomas Mikolov, Ilya Sutskever, Kai Chen, Greg Corrado 
and Jeffrey Dean
+This note is an attempt to explain the negative sampling equation in 
“Distributed Representations of Words and Phrases and their 
Compositionality” by Tomas Mikolov Ilya Sutskever Kai Chen Greg Corrado and 
Jeffrey Dean
 The departure point of the paper is the skip-gram model
 In this model we are given a corpus of words w and their contexts c
-We consider the conditional probabilities p(c|w) and given a corpus Text, the 
goal is to set the parameters θ of p(c|w;θ) so as to maximize the corpus 
probability
+We consider the conditional probabilities p(c|w) and given a corpus Text the 
goal is to set the parameters θ of p(c|w;θ) so as to maximize the corpus 
probability
 The recently introduced continuous Skip-gram model is an efficient method for 
learning high-quality distributed vector representations that capture a large 
number of precise syntactic and semantic word relationships
 In this paper we present several extensions that improve both the quality of 
the vectors and the training speed
 By subsampling of the frequent words we obtain significant speedup and also 
learn more regular word representations
 We also describe a simple alternative to the hierarchical softmax called 
negative sampling
 An inherent limitation of word representations is their indifference to word 
order and their inability to represent idiomatic phrases
-For example, the meanings of “Canada” and “Air” cannot be easily 
combined to obtain “Air Canada”
-Motivated by this example, we present a simple method for finding phrases in 
text and show that learning good vector representations for millions of phrases 
is possible
\ No newline at end of file
+For example the meanings of “Canada” and “Air” cannot be easily 
combined to obtain “Air Canada”
+Motivated by this example we present a simple method for finding phrases in 
text and show that learning good vector representations for millions of phrases 
is possible
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org
For additional commands, e-mail: commits-h...@labs.apache.org

Reply via email to