Author: tommaso
Date: Mon Nov  2 16:10:29 2015
New Revision: 1712061

URL: http://svn.apache.org/viewvc?rev=1712061&view=rev
Log:
performance improvements for ff, backprop and softmax

Modified:
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 Mon Nov  2 16:10:29 2015
@@ -18,11 +18,8 @@
  */
 package org.apache.yay.core;
 
-import java.util.Arrays;
-import java.util.Iterator;
-
-import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
+import org.apache.commons.math3.linear.RealMatrixChangingVisitor;
 import org.apache.yay.CostFunction;
 import org.apache.yay.DerivativeUpdateFunction;
 import org.apache.yay.LearningStrategy;
@@ -32,6 +29,9 @@ import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
 
+import java.util.Arrays;
+import java.util.Iterator;
+
 /**
  * Back propagation learning algorithm for neural networks implementation (see
  * <code>http://en.wikipedia.org/wiki/Backpropagation</code>).
@@ -110,7 +110,7 @@ public class BackPropagationLearningStra
         } else if (iterations > 1 && (cost == newCost || newCost < threshold 
|| iterations > maxIterations)) {
           System.out.println("successfully converged after " + (iterations - 
1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " 
+ newCost + " and parameters " + Arrays.toString(hypothesis.getParameters()));
           break;
-        } else if (Double.isNaN(newCost)){
+        } else if (Double.isNaN(newCost)) {
           throw new RuntimeException("failed to converge at iteration " + 
iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
@@ -135,25 +135,39 @@ public class BackPropagationLearningStra
     return updatedWeights;
   }
 
-  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, 
RealMatrix[] derivatives, double alpha) {
+  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, final 
RealMatrix[] derivatives, final double alpha) {
     RealMatrix[] updatedParameters = new RealMatrix[weightsMatrixSet.length];
+
     for (int l = 0; l < weightsMatrixSet.length; l++) {
-      double[][] updatedWeights = weightsMatrixSet[l].getData();
-      for (int i = 0; i < updatedWeights.length; i++) {
-        for (int j = 0; j < updatedWeights[i].length; j++) {
-          double curVal = updatedWeights[i][j];
-          if (!(i == 0 && curVal == 0d) && !(j == 0 && curVal == 1d)) {
-            updatedWeights[i][j] = updatedWeights[i][j] - alpha * 
derivatives[l].getData()[i][j];
+      RealMatrix realMatrix = weightsMatrixSet[l].copy();
+      final double[][] data = derivatives[l].getData();
+      RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor(){
+
+        @Override
+        public void start(int rows, int columns, int startRow, int endRow, int 
startColumn, int endColumn) {
+
+        }
+
+        @Override
+        public double visit(int row, int column, double value) {
+          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
+            return value - alpha * data[row][column];
+          } else {
+            return value;
           }
         }
-      }
-      if (updatedParameters[l] != null) {
-        updatedParameters[l].setSubMatrix(updatedWeights, 0, 0);
-      } else {
-        updatedParameters[l] = new Array2DRowRealMatrix(updatedWeights);
+
+        @Override
+        public double end() {
+          return 0;
+        }
+      };
+      realMatrix.walkInOptimizedOrder(visitor);
+      if (updatedParameters[l]== null) {
+        updatedParameters[l] = realMatrix;
       }
     }
     return updatedParameters;
   }
 
-}
\ No newline at end of file
+}

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java 
(original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java 
Mon Nov  2 16:10:29 2015
@@ -96,7 +96,7 @@ public class FeedForwardStrategy impleme
           return 0;
         }
       };
-      x.walkInRowOrder(visitor);
+      x.walkInOptimizedOrder(visitor);
       debugOutput[w] = x.getRowVector(0);
     }
     return debugOutput;

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/SoftmaxActivationFunction.java
 Mon Nov  2 16:10:29 2015
@@ -21,21 +21,36 @@ package org.apache.yay.core;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.yay.ActivationFunction;
 
+import java.util.Map;
+import java.util.WeakHashMap;
+
 /**
  * Softmax activation function
  */
 public class SoftmaxActivationFunction implements ActivationFunction<Double> {
 
-    @Override
-    public Double apply(RealMatrix weights, Double signal) {
-        double num = Math.exp(signal);
-        double den = 0d;
-        for (int i = 0; i < weights.getRowDimension(); i++) {
-            double[] row1 = weights.getRow(i);
-            for (int j = 0; j < weights.getColumnDimension(); j++) {
-                den += Math.exp(row1[j]);
-            }
+  private static final Map<RealMatrix, Double> cache = new 
WeakHashMap<RealMatrix, Double>();
+
+  @Override
+  public Double apply(RealMatrix weights, Double signal) {
+    double num = Math.exp(signal);
+    double den = getDen(weights);
+    return num / den;
+  }
+
+  private double getDen(RealMatrix weights) {
+    Double d = cache.get(weights);
+    if (d == null) {
+      double den = 0d;
+      for (int i = 0; i < weights.getRowDimension(); i++) {
+        double[] row1 = weights.getRow(i);
+        for (int j = 0; j < weights.getColumnDimension(); j++) {
+          den += Math.exp(row1[j]);
         }
-        return num / den;
+      }
+      d = den;
+      cache.put(weights, d);
     }
+    return d;
+  }
 }

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java 
(original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java 
Mon Nov  2 16:10:29 2015
@@ -20,9 +20,13 @@ package org.apache.yay.core;
 
 import org.apache.commons.math3.linear.Array2DRowRealMatrix;
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.ml.distance.*;
+import org.apache.commons.math3.ml.distance.CanberraDistance;
+import org.apache.commons.math3.ml.distance.ChebyshevDistance;
+import org.apache.commons.math3.ml.distance.DistanceMeasure;
+import org.apache.commons.math3.ml.distance.EarthMoversDistance;
+import org.apache.commons.math3.ml.distance.EuclideanDistance;
+import org.apache.commons.math3.ml.distance.ManhattanDistance;
 import org.apache.commons.math3.stat.correlation.PearsonsCorrelation;
-
 import org.apache.yay.ActivationFunction;
 import org.apache.yay.Feature;
 import org.apache.yay.NeuralNetwork;
@@ -31,6 +35,9 @@ import org.apache.yay.TrainingSet;
 import org.junit.Test;
 
 import java.io.BufferedReader;
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileWriter;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
@@ -67,14 +74,14 @@ public class WordVectorsTest {
 
     int inputSize = next.getFeatures().size();
     int outputSize = next.getOutput().length;
-    int hiddenSize = 50;
+    int hiddenSize = 100;
     RealMatrix[] randomWeights = createRandomWeights(inputSize, hiddenSize, 
outputSize);
 
     Map<Integer, ActivationFunction<Double>> activationFunctions = new 
HashMap<Integer, ActivationFunction<Double>>();
     activationFunctions.put(0, new IdentityActivationFunction<Double>());
     activationFunctions.put(1, new SoftmaxActivationFunction());
     FeedForwardStrategy predictionStrategy = new 
FeedForwardStrategy(activationFunctions);
-    BackPropagationLearningStrategy learningStrategy = new 
BackPropagationLearningStrategy(0.03d, 1,
+    BackPropagationLearningStrategy learningStrategy = new 
BackPropagationLearningStrategy(0.01d, 1,
             BackPropagationLearningStrategy.DEFAULT_THRESHOLD, 
predictionStrategy, new LogisticRegressionCostFunction(),
             100);
     NeuralNetwork neuralNetwork = NeuralNetworkFactory.create(randomWeights, 
learningStrategy, predictionStrategy);
@@ -130,6 +137,20 @@ public class WordVectorsTest {
       computeSimilarities(vocabulary, wordVectors, distanceMeasure);
     }
 
+    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new 
File("target/sg-vectors.csv")));
+    for (int i = 1; i < wordVectors.getColumnDimension(); i++) {
+      double[] a = wordVectors.getColumnVector(i).toArray();
+      String csq = Arrays.toString(Arrays.copyOfRange(a, 1, a.length));
+      csq = csq.substring(1, csq.length() - 1);
+      bufferedWriter.append(csq);
+      bufferedWriter.append(",");
+      bufferedWriter.append(vocabulary.get(i-1));
+      bufferedWriter.newLine();
+    }
+
+    bufferedWriter.flush();
+    bufferedWriter.close();
+
 //    RealMatrix mappingsMatrix = 
MatrixUtils.createRealMatrix(next.getFeatures().size(), 
next.getOutput().length);
 //
 //    BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new 
File("target/sg-vectors.txt")));
@@ -365,7 +386,7 @@ public class WordVectorsTest {
     Collection<String> sentences = new LinkedList<String>();
     String line;
     while ((line = bufferedReader.readLine()) != null) {
-      sentences.add(line);
+      sentences.add(line.toLowerCase());
     }
     return sentences;
   }

Modified: labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt?rev=1712061&r1=1712060&r2=1712061&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/sentences.txt Mon Nov  2 
16:10:29 2015
@@ -34,4 +34,19 @@ This is both computationally unfeasible
 Based on a recent work that proposed to learn a generic language model that 
can be modified through a set of document-specific parameters we explore use of 
new neural network models that are adapted to ad-hoc IR tasks
 Within the language model IR framework we propose and study the use of a 
generic language model as well as a document-specific language model
 Both can be used as a smoothing component but the latter is more adapted to 
the document at hand and has the potential of being used as a full document 
language model
-We experiment with such models and analyze their results on TREC-1 to 8 
datasets
\ No newline at end of file
+We experiment with such models and analyze their results on TREC-1 to 8 
datasets
+The word2vec model and application by Mikolov et al have attracted a great 
amount of attention in recent two years
+The vector representations of words learned by word2vec models have been 
proven to be able to carry semantic meanings and are useful in various NLP tasks
+As an increasing number of researchers would like to experiment with word2vec 
I notice that there lacks a material that comprehensively explains the 
parameter learning process of word2vec in details thus preventing many people 
with less neural network experience from understanding how exactly word2vec 
works
+This note provides detailed derivations and explanations of the parameter 
update equations for the word2vec models including the original continuous 
bag-of-word (CBOW) and skip-gram models as well as advanced tricks hierarchical 
soft-max and negative sampling
+In the appendix a review is given on the basics of neuron network models and 
backpropagation
+To avoid the inaccuracy caused by classifying the example into several 
categories given by TREC manually we take the word2vec to represent all 
attractions and user contexts in the continuous vector space learnt by neural 
network language models
+The base of NNML is using neural networks for the probability function
+The model learns simultaneously a distributed representation for each word 
along with the probability function for word sequences expressed in terms of 
these representations
+Training such large models we propose continuous bag of words as our framework 
and soft-max as the active function
+So we use the word2vec to train wikitravel corpus and got the word vector
+To avoid the curse of dimensionality by learning a distributed representation 
for words as our word vector we define a test set that compare different 
dimensionality of vectors for our task using the same training data and using 
the same model architecture
+We extend the word2vec framework to capture meaning across languages
+The input consists of a source text and a word-aligned parallel text in a 
second language
+The joint word2vec tool then represents words in both languages within a 
common “semantic” vector space
+The result can be used to enrich lexicons of under-resourced languages to 
identify ambiguities and to perform clustering and classification
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org
For additional commands, e-mail: commits-h...@labs.apache.org

Reply via email to