Author: tommaso
Date: Thu Dec 17 07:04:12 2015
New Revision: 1720488

URL: http://svn.apache.org/viewvc?rev=1720488&view=rev
Log:
performance improvements (parallel exec, simplified Feature APIs), revert 
derivative update function to latest working version

Removed:
    labs/yay/trunk/api/src/main/java/org/apache/yay/Feature.java
Modified:
    labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java
    labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
    labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
    
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
    
labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
    
labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
    labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
    labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt

Modified: labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java (original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/Input.java Thu Dec 17 
07:04:12 2015
@@ -18,7 +18,7 @@
  */
 package org.apache.yay;
 
-import java.util.ArrayList;
+import java.util.List;
 
 /**
  * A sample/input characterized by its features.
@@ -26,10 +26,10 @@ import java.util.ArrayList;
 public interface Input<F> {
 
   /**
-   * Get this <code>Input</code> {@link org.apache.yay.Feature}s
+   * Get this <code>Input</code> features
    *
-   * @return an <code>ArrayList</code> of {@link org.apache.yay.Feature}s
+   * @return an <code>ArrayList</code> of features
    */
-  ArrayList<Feature<F>> getFeatures();
+  List<F> getFeatures();
 
 }

Modified: 
labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java 
(original)
+++ labs/yay/trunk/api/src/main/java/org/apache/yay/PredictionStrategy.java Thu 
Dec 17 07:04:12 2015
@@ -18,10 +18,11 @@
  */
 package org.apache.yay;
 
-import java.util.Collection;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.RealVector;
 
+import java.util.Collection;
+
 /**
  * A {@link PredictionStrategy} defines an algorithm for the prediction of 
outputs
  * of type <code>O</code> given inputs of type <code>I</code>.
@@ -38,6 +39,15 @@ public interface PredictionStrategy<I, O
   O[] predictOutput(Collection<I> inputs, RealMatrix[] weightsMatrixSet);
 
   /**
+   * Perform a prediction and returns a vector containing the outputs
+   *
+   * @param inputVector      a vector of input values
+   * @param weightsMatrixSet the initial set of weights defined by an array of 
matrix
+   * @return the array containing the last layer's outputs
+   */
+  RealVector predictOutput(RealVector inputVector, RealMatrix[] 
weightsMatrixSet);
+
+  /**
    * Perform a prediction on the given input values and weights settings 
returning
    * a debug output.
    *
@@ -47,4 +57,14 @@ public interface PredictionStrategy<I, O
    */
   RealVector[] debugOutput(Collection<I> inputs, RealMatrix[] 
weightsMatrixSet);
 
+  /**
+   * Perform a prediction on the given input values and weights settings 
returning
+   * a debug output.
+   *
+   * @param inputVector      a vector of input values
+   * @param weightsMatrixSet the initial set of weights defined by an array of 
matrix
+   * @return the perturbed neural network state via its activations values
+   */
+  RealVector[] debugOutput(RealVector inputVector, RealMatrix[] 
weightsMatrixSet);
+
 }

Modified: 
labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java 
(original)
+++ 
labs/yay/trunk/api/src/main/java/org/apache/yay/WeightLearningException.java 
Thu Dec 17 07:04:12 2015
@@ -30,4 +30,8 @@ public class WeightLearningException ext
   public WeightLearningException(String s, Exception e) {
     super(s, e);
   }
+
+  public WeightLearningException(String s) {
+    super(s);
+  }
 }

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BackPropagationLearningStrategy.java
 Thu Dec 17 07:04:12 2015
@@ -25,12 +25,14 @@ import org.apache.yay.DerivativeUpdateFu
 import org.apache.yay.LearningStrategy;
 import org.apache.yay.NeuralNetwork;
 import org.apache.yay.PredictionStrategy;
-import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
 
-import java.util.Arrays;
-import java.util.Iterator;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.Future;
 
 /**
  * Back propagation learning algorithm for neural networks implementation (see
@@ -49,6 +51,7 @@ public class BackPropagationLearningStra
   private final double threshold;
   private final int batch;
   private final int maxIterations;
+  private final ExecutorService executorService = 
Executors.newCachedThreadPool();
 
   public BackPropagationLearningStrategy(double alpha, double threshold, 
PredictionStrategy<Double, Double> predictionStrategy,
                                          CostFunction<RealMatrix, Double, 
Double> costFunction) {
@@ -79,12 +82,11 @@ public class BackPropagationLearningStra
 
   @Override
   public RealMatrix[] learnWeights(RealMatrix[] weightsMatrixSet, 
TrainingSet<Double, Double> trainingExamples) throws WeightLearningException {
-    RealMatrix[] updatedWeights = weightsMatrixSet;
+    RealMatrix[] weights = weightsMatrixSet;
     try {
       int iterations = 0;
 
       NeuralNetwork neuralNetwork = 
NeuralNetworkFactory.create(weightsMatrixSet, new VoidLearningStrategy<>(), 
predictionStrategy);
-      Iterator<TrainingExample<Double, Double>> iterator = 
trainingExamples.iterator();
 
       double cost = Double.MAX_VALUE;
       long start = System.currentTimeMillis();
@@ -100,12 +102,12 @@ public class BackPropagationLearningStra
         double newCost = costFunction.calculateCost(nextBatch, neuralNetwork);
 
         if (Double.POSITIVE_INFINITY == newCost || newCost > cost && batch == 
-1) {
-          throw new RuntimeException("failed to converge at iteration " + 
iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + 
newCost);
+          throw new WeightLearningException("failed to converge at iteration " 
+ iterations + " with alpha " + alpha + " : cost going from " + cost + " to " + 
newCost);
         } else if (iterations > 1 && (cost == newCost || newCost < threshold 
|| iterations > maxIterations)) {
-          System.out.println("successfully converged after " + (iterations - 
1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " 
+ newCost + " and parameters " + 
Arrays.toString(neuralNetwork.getParameters()));
+          System.out.println("successfully converged after " + (iterations - 
1) + " iterations (alpha:" + alpha + ",threshold:" + threshold + ") with cost " 
+ newCost);
           break;
         } else if (Double.isNaN(newCost)) {
-          throw new RuntimeException("failed to converge at iteration " + 
iterations + " with alpha " + alpha + " : cost calculation underflow");
+          throw new WeightLearningException("failed to converge at iteration " 
+ iterations + " with alpha " + alpha + " : cost calculation underflow");
         }
 
         // update registered cost
@@ -115,52 +117,72 @@ public class BackPropagationLearningStra
         RealMatrix[] derivatives = 
derivativeUpdateFunction.getUpdatedDerivatives(weightsMatrixSet, nextBatch);
 
         // calculate the updated parameters
-        updatedWeights = updateWeights(updatedWeights, derivatives, alpha);
+        weights = getUpdatedWeights(weights, derivatives, alpha);
 
         // update parameters in the hypothesis
-        neuralNetwork.setParameters(updatedWeights);
+        neuralNetwork.setParameters(weights);
 
         iterations++;
       }
+    } catch (WeightLearningException e) {
+      throw e;
     } catch (Exception e) {
       throw new WeightLearningException("error during backprop learning", e);
     }
 
-    return updatedWeights;
+    return weights;
   }
 
-  private RealMatrix[] updateWeights(RealMatrix[] weightsMatrixSet, final 
RealMatrix[] derivatives, final double alpha) {
-    RealMatrix[] updatedParameters = new RealMatrix[weightsMatrixSet.length];
+  private RealMatrix[] getUpdatedWeights(RealMatrix[] weightsMatrixSet, final 
RealMatrix[] derivatives, final double alpha) {
+    int length = weightsMatrixSet.length;
+
+    RealMatrix[] updatedParameters = new RealMatrix[length];
+
+    List<Future<RealMatrix>> futures = new ArrayList<>(length);
+
+    for (int l = 0; l < length; l++) {
 
-    for (int l = 0; l < weightsMatrixSet.length; l++) {
       RealMatrix realMatrix = weightsMatrixSet[l].copy();
       final int finalL = l;
-      RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor() {
+      futures.add(executorService.submit(() -> {
+        RealMatrixChangingVisitor visitor = new RealMatrixChangingVisitor() {
 
-        @Override
-        public void start(int rows, int columns, int startRow, int endRow, int 
startColumn, int endColumn) {
+          @Override
+          public void start(int rows, int columns, int startRow, int endRow, 
int startColumn, int endColumn) {
 
-        }
+          }
 
-        @Override
-        public double visit(int row, int column, double value) {
-          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
-            return value - alpha * derivatives[finalL].getEntry(row, column);
-          } else {
-            return value;
+          @Override
+          public double visit(int row, int column, double value) {
+            if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
+              return value - alpha * derivatives[finalL].getEntry(row, column);
+            } else {
+              return value;
+            }
           }
-        }
 
-        @Override
-        public double end() {
-          return 0;
+          @Override
+          public double end() {
+            return 0;
+          }
+        };
+        realMatrix.walkInOptimizedOrder(visitor);
+        return realMatrix;
+      }));
+
+    }
+    int k = 0;
+    for (Future<RealMatrix> future : futures) {
+      if (updatedParameters[k] == null) {
+        try {
+          updatedParameters[k] = future.get();
+        } catch (Exception e) {
+          throw new RuntimeException(e);
         }
-      };
-      realMatrix.walkInOptimizedOrder(visitor);
-      if (updatedParameters[l] == null) {
-        updatedParameters[l] = realMatrix;
       }
+      k++;
     }
+
     return updatedParameters;
   }
 

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java 
(original)
+++ labs/yay/trunk/core/src/main/java/org/apache/yay/core/BasicPerceptron.java 
Thu Dec 17 07:04:12 2015
@@ -27,7 +27,6 @@ import org.apache.yay.PredictionExceptio
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.neuron.BinaryThresholdNeuron;
-import org.apache.yay.core.utils.ConversionUtils;
 
 import java.util.Collection;
 
@@ -61,7 +60,7 @@ public class BasicPerceptron implements
   }
 
   public void learn(TrainingExample<Double, Double> example) {
-    Collection<Double> doubles = 
ConversionUtils.toValuesCollection(example.getFeatures());
+    Collection<Double> doubles = example.getFeatures();
     Double[] inputs = doubles.toArray(new Double[doubles.size()]);
     Double calculatedOutput = perceptronNeuron.elaborate(inputs);
     int diff = calculatedOutput.compareTo(example.getOutput()[0]);
@@ -93,7 +92,7 @@ public class BasicPerceptron implements
 
   @Override
   public Double[] predict(Input<Double> input) throws PredictionException {
-    Double output = 
perceptronNeuron.elaborate(ConversionUtils.toValuesCollection(input.getFeatures()).toArray(
+    Double output = perceptronNeuron.elaborate(input.getFeatures().toArray(
             new Double[input.getFeatures().size()]));
     return new Double[]{output};
   }

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/DefaultDerivativeUpdateFunction.java
 Thu Dec 17 07:04:12 2015
@@ -18,20 +18,15 @@
  */
 package org.apache.yay.core;
 
-import org.apache.commons.math3.linear.OpenMapRealVector;
+import org.apache.commons.math3.linear.ArrayRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
-import org.apache.commons.math3.linear.RealMatrixChangingVisitor;
 import org.apache.commons.math3.linear.RealVector;
 import org.apache.yay.DerivativeUpdateFunction;
-import org.apache.yay.Feature;
 import org.apache.yay.PredictionStrategy;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ConversionUtils;
 
-import java.util.ArrayList;
-import java.util.Collection;
-
 /**
  * Default derivatives update function
  */
@@ -50,71 +45,49 @@ class DefaultDerivativeUpdateFunction im
     RealVector[] deltaVectors = new RealVector[weightsMatrixSet.length];
 
     int noOfMatrixes = weightsMatrixSet.length - 1;
+    double count = 0;
     for (TrainingExample<Double, Double> trainingExample : trainingExamples) {
       try {
         // get activations from feed forward propagation
-        ArrayList<Feature<Double>> features = trainingExample.getFeatures();
-        Collection<Double> input = 
ConversionUtils.toValuesCollection(features);
-
-        RealVector[] activations = predictionStrategy.debugOutput(input, 
weightsMatrixSet);
+        RealVector[] activations = 
predictionStrategy.debugOutput(trainingExample.getFeatures(), weightsMatrixSet);
 
         // calculate output error (corresponding to the last delta^l)
         RealVector nextLayerDelta = calculateOutputError(trainingExample, 
activations);
 
-        updateDeltaVectors(weightsMatrixSet, deltaVectors, noOfMatrixes, 
activations, nextLayerDelta);
+        deltaVectors[noOfMatrixes] = nextLayerDelta;
+
+        // back prop the error and update the deltas accordingly
+        for (int l = noOfMatrixes; l > 0; l--) {
+          RealVector currentActivationsVector = activations[l - 1];
+          nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], 
currentActivationsVector, nextLayerDelta);
+
+          // collect delta vectors for this example
+          deltaVectors[l - 1] = nextLayerDelta;
+        }
 
         RealVector[] newActivations = new RealVector[activations.length];
-        newActivations[0] = ConversionUtils.toRealVector(input);
+        newActivations[0] = 
ConversionUtils.toRealVector(trainingExample.getFeatures());
         System.arraycopy(activations, 0, newActivations, 1, activations.length 
- 1);
 
         // update triangle (big delta matrix)
-        updateTriangle(triangle, newActivations, deltaVectors);
+        updateTriangle(triangle, newActivations, deltaVectors, 
weightsMatrixSet);
+
       } catch (Exception e) {
         throw new RuntimeException("error during derivatives calculation", e);
       }
+      count++;
     }
 
-    for (RealMatrix aTriangle : triangle) {
-      aTriangle.walkInOptimizedOrder(new RealMatrixChangingVisitor() {
-        @Override
-        public void start(int rows, int columns, int startRow, int endRow, int 
startColumn, int endColumn) {
-
-        }
-
-        @Override
-        public double visit(int row, int column, double value) {
-          if (!(row == 0 && value == 0d) && !(column == 0 && value == 1d)) {
-            return value / trainingExamples.size();
-          } else {
-            return value;
-          }
-        }
-
-        @Override
-        public double end() {
-          return 0;
-        }
-      });
-    }
-    return triangle;
-  }
-
-  private void updateDeltaVectors(RealMatrix[] weightsMatrixSet, RealVector[] 
deltaVectors, int noOfMatrixes, RealVector[] activations,
-                                  RealVector nextLayerDelta) {
-    deltaVectors[noOfMatrixes] = nextLayerDelta;
-
-    // back prop the error and update the deltas accordingly
-    for (int l = noOfMatrixes; l > 0; l--) {
-      RealVector currentActivationsVector = activations[l - 1];
-      nextLayerDelta = calculateDeltaVector(weightsMatrixSet[l], 
currentActivationsVector, nextLayerDelta);
-
-      // collect delta vectors for this example
-      deltaVectors[l - 1] = nextLayerDelta;
+    RealMatrix[] derivatives = new RealMatrix[triangle.length];
+    for (int i = 0; i < triangle.length; i++) {
+      // TODO : introduce regularization diversification on bias term 
(currently not regularized)
+      derivatives[i] = triangle[i].scalarMultiply(1d / count);
     }
+    return derivatives;
   }
 
-  private void updateTriangle(RealMatrix[] triangle, RealVector[] activations, 
RealVector[] deltaVectors) {
-    for (int l = triangle.length - 1; l >= 0; l--) {
+  private void updateTriangle(RealMatrix[] triangle, RealVector[] activations, 
RealVector[] deltaVectors, RealMatrix[] weightsMatrixSet) {
+    for (int l = weightsMatrixSet.length - 1; l >= 0; l--) {
       RealMatrix realMatrix = deltaVectors[l].outerProduct(activations[l]);
       if (triangle[l] == null) {
         triangle[l] = realMatrix;
@@ -126,7 +99,7 @@ class DefaultDerivativeUpdateFunction im
 
   private RealVector calculateDeltaVector(RealMatrix thetaL, RealVector 
activationsVector, RealVector nextLayerDelta) {
     // TODO : remove the bias term from the error calculations
-    RealVector identity = new 
OpenMapRealVector(activationsVector.getDimension(), 1d);
+    ArrayRealVector identity = new 
ArrayRealVector(activationsVector.getDimension(), 1d);
     RealVector gz = 
activationsVector.ebeMultiply(identity.subtract(activationsVector)); // = a^l 
.* (1-a^l)
     return thetaL.preMultiply(nextLayerDelta).ebeMultiply(gz);
   }
@@ -134,12 +107,19 @@ class DefaultDerivativeUpdateFunction im
   private RealVector calculateOutputError(TrainingExample<Double, Double> 
trainingExample, RealVector[] activations) {
     RealVector output = activations[activations.length - 1];
 
+//    Double[] sampleOutput = new Double[output.getDimension()];
     Double[] actualOutput = trainingExample.getOutput();
-    RealVector learnedOutputRealVector = new OpenMapRealVector(actualOutput); 
// turn example output to a vector
+//    int sampleOutputIntValue = actualOutput.intValue();
+//    if (sampleOutputIntValue < sampleOutput.length) {
+//      sampleOutput[sampleOutputIntValue] = 1d;
+//    } else if (sampleOutput.length == 1) {
+//      sampleOutput[0] = actualOutput;
+//    } else {
+//      throw new RuntimeException("problem with multiclass output mapping");
+//    }
+    RealVector learnedOutputRealVector = new ArrayRealVector(actualOutput); // 
turn example output to a vector
 
-    // error calculation -> er_a = out_a * (1 - out_a) * (tgt_a - out_a) (was: 
output.subtract(learnedOutputRealVector)
-    // targetOutputRealVector.subtract(output).map(x -> Math.pow(x, 2)); // 
squared error
-    // return output.subtract(learnedOutputRealVector);
-    return output.ebeMultiply(new OpenMapRealVector(output.getDimension(), 
1d).subtract(output)).ebeMultiply(output.subtract(learnedOutputRealVector));
+    // TODO : improve error calculation -> this could be er_a = out_a * (1 - 
out_a) * (tgt_a - out_a)
+    return output.subtract(learnedOutputRealVector);
   }
 }

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java 
(original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/EncodedTrainingSet.java 
Thu Dec 17 07:04:12 2015
@@ -18,7 +18,6 @@
  */
 package org.apache.yay.core;
 
-import org.apache.yay.Feature;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ConversionUtils;
@@ -57,12 +56,12 @@ public class EncodedTrainingSet extends
       @Override
       public TrainingExample<Double, Double> next() {
         TrainingExample<Double, Double> sample = 
EncodedTrainingSet.super.iterator().next();
-        Collection<Feature<Double>> features = sample.getFeatures();
+        Collection<Double> features = sample.getFeatures();
         int vocabularySize = vocabulary.size();
         Double[] outputs = new Double[vocabularySize * (window - 1)];
         Double[] inputs = new Double[vocabularySize];
-        for (Feature<Double> feature : features) {
-          inputs = ConversionUtils.hotEncode(feature.getValue().intValue(), 
vocabularySize);
+        for (Double feature : features) {
+          inputs = ConversionUtils.hotEncode(feature.intValue(), 
vocabularySize);
           break;
         }
         int k = 0;

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java 
(original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/FeedForwardStrategy.java 
Thu Dec 17 07:04:12 2015
@@ -54,20 +54,31 @@ public class FeedForwardStrategy impleme
 
   @Override
   public Double[] predictOutput(Collection<Double> input, RealMatrix[] 
realMatrixSet) {
-    RealVector[] activations = applyFF(input, realMatrixSet);
+    RealVector[] activations = debugOutput(input, realMatrixSet);
     RealVector x = activations[activations.length - 1];
     return ConversionUtils.toDoubleArray(x.toArray());
   }
 
+  @Override
+  public RealVector predictOutput(RealVector inputVector, RealMatrix[] 
weightsMatrixSet) {
+    RealVector[] activations = debugOutput(inputVector, weightsMatrixSet);
+    return activations[activations.length - 1];
+  }
+
   public RealVector[] debugOutput(Collection<Double> input, RealMatrix[] 
realMatrixSet) {
-    return applyFF(input, realMatrixSet);
+    Double[] doubles = input.toArray(new Double[input.size()]);
+    return 
applyFF(Stream.of(doubles).mapToDouble(Double::doubleValue).toArray(), 
realMatrixSet);
   }
 
-  private RealVector[] applyFF(Collection<Double> input, RealMatrix[] 
realMatrixSet) {
+  @Override
+  public RealVector[] debugOutput(RealVector inputVector, RealMatrix[] 
weightsMatrixSet) {
+    return applyFF(inputVector.toArray(), weightsMatrixSet);
+  }
+
+  private RealVector[] applyFF(double[] inputs, RealMatrix[] realMatrixSet) {
     RealVector[] debugOutput = new RealVector[realMatrixSet.length];
 
-    Double[] doubles = input.toArray(new Double[input.size()]);
-    RealMatrix x = 
MatrixUtils.createRowRealMatrix(Stream.of(doubles).mapToDouble(Double::doubleValue).toArray());
+    RealMatrix x = MatrixUtils.createRowRealMatrix(inputs);
     for (int w = 0; w < realMatrixSet.length; w++) {
       // compute matrix multiplication
       x = x.multiply(realMatrixSet[w].transpose());

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/LogisticRegressionCostFunction.java
 Thu Dec 17 07:04:12 2015
@@ -67,13 +67,6 @@ public class LogisticRegressionCostFunct
           return res;
         }
       });
-//      for (int i = 0; i < layerMatrix.getColumnDimension(); i++) {
-//        double[] column = layerMatrix.getColumn(i);
-//        // starting from 1 to avoid including the bias unit in regularization
-//        for (int j = 1; j < column.length; j++) {
-//          res += Math.pow(column[j], 2d);
-//        }
-//      }
     }
     return (lambda / (2d * trainingExamples.size())) * res;
   }

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java 
(original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/MaxSelectionFunction.java 
Thu Dec 17 07:04:12 2015
@@ -18,9 +18,10 @@
  */
 package org.apache.yay.core;
 
+import org.apache.yay.SelectionFunction;
+
 import java.util.Collection;
 import java.util.Collections;
-import org.apache.yay.SelectionFunction;
 
 /**
  * Selects the max value from a {@link Collection} of {@link Comparable} 
outputs.

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java 
(original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/NeuralNetworkFactory.java 
Thu Dec 17 07:04:12 2015
@@ -18,7 +18,6 @@
  */
 package org.apache.yay.core;
 
-import java.util.Collection;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.yay.Input;
 import org.apache.yay.LearningException;
@@ -28,7 +27,8 @@ import org.apache.yay.PredictionExceptio
 import org.apache.yay.PredictionStrategy;
 import org.apache.yay.TrainingSet;
 import org.apache.yay.WeightLearningException;
-import org.apache.yay.core.utils.ConversionUtils;
+
+import java.util.Collection;
 
 /**
  * Factory class for creating {@link org.apache.yay.NeuralNetwork}s
@@ -49,7 +49,7 @@ public class NeuralNetworkFactory {
     return new NeuralNetwork() {
 
       private Double[] getOutputVector(Input<Double> input) {
-        Collection<Double> inputVector = 
ConversionUtils.toValuesCollection(input.getFeatures());
+        Collection<Double> inputVector = input.getFeatures();
         return predictionStrategy.predictOutput(inputVector, 
updatedRealMatrixSet);
       }
 

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ConversionUtils.java
 Thu Dec 17 07:04:12 2015
@@ -22,16 +22,13 @@ import org.apache.commons.math3.linear.M
 import org.apache.commons.math3.linear.OpenMapRealVector;
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.linear.RealVector;
-import org.apache.yay.Feature;
 import org.apache.yay.Input;
 
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.List;
 import java.util.WeakHashMap;
-import java.util.stream.Collectors;
 
 /**
  * Temporary class for conversion between model objects and commons-math 
matrices/vectors
@@ -69,8 +66,8 @@ public class ConversionUtils {
   private static double[] toDoubleArray(Input<Double> sample) {
     double[] ar = new double[sample.getFeatures().size()];
     int i = 0;
-    for (Feature<Double> f : sample.getFeatures()) {
-      ar[i] = f.getValue();
+    for (Double f : sample.getFeatures()) {
+      ar[i] = f;
       i++;
     }
     return ar;
@@ -87,18 +84,6 @@ public class ConversionUtils {
   }
 
   /**
-   * turns a collection of features of type <code>T</code> into a collection of
-   * <code>T</code> objects.
-   *
-   * @param featureVector the vector of features
-   * @param <T>           the type of features
-   * @return a vector of Doubles
-   */
-  public static <T> Collection<T> toValuesCollection(Collection<Feature<T>> 
featureVector) {
-    return 
featureVector.stream().map(Feature::getValue).collect(Collectors.toCollection(ArrayList::new));
-  }
-
-  /**
    * this is just nice! :-) (thanks commons-math)
    *
    * @param ar a double array

Modified: 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
 (original)
+++ 
labs/yay/trunk/core/src/main/java/org/apache/yay/core/utils/ExamplesFactory.java
 Thu Dec 17 07:04:12 2015
@@ -18,11 +18,11 @@
  */
 package org.apache.yay.core.utils;
 
-import org.apache.yay.Feature;
-import org.apache.yay.Input;
 import org.apache.yay.TrainingExample;
 
-import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.LinkedList;
+import java.util.List;
 
 /**
  * Factory class for {@link org.apache.yay.Input}s and {@link 
TrainingExample}s.
@@ -33,8 +33,11 @@ public class ExamplesFactory {
                                                                             
final Double... featuresValues) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        return doublesToFeatureVector(featuresValues);
+      public List<Double> getFeatures() {
+        List<Double> doubles = new LinkedList<>();
+        doubles.add(1d);
+        doubles.addAll(Arrays.asList(featuresValues));
+        return doubles;
       }
 
       @Override
@@ -45,11 +48,14 @@ public class ExamplesFactory {
   }
 
   public static TrainingExample<Double, Double> 
createDoubleArrayTrainingExample(final Double[] output,
-                                                                            
final Double... featuresValues) {
+                                                                               
  final Double... featuresValues) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        return doublesToFeatureVector(featuresValues);
+      public List<Double> getFeatures() {
+        List<Double> doubles = new LinkedList<>();
+        doubles.add(1d);
+        doubles.addAll(Arrays.asList(featuresValues));
+        return doubles;
       }
 
       @Override
@@ -59,21 +65,4 @@ public class ExamplesFactory {
     };
   }
 
-  public static Input<Double> createDoubleInput(final Double... 
featuresValues) {
-    return () -> doublesToFeatureVector(featuresValues);
-  }
-
-  private static ArrayList<Feature<Double>> doublesToFeatureVector(Double[] 
featuresValues) {
-    ArrayList<Feature<Double>> features = new ArrayList<>();
-    Feature<Double> byasFeature = new Feature<>();
-    byasFeature.setValue(1d);
-    features.add(byasFeature);
-    for (Double d : featuresValues) {
-      Feature<Double> feature = new Feature<>();
-      feature.setValue(d);
-      features.add(feature);
-    }
-    return features;
-  }
-
 }

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java 
(original)
+++ 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/BasicPerceptronTest.java 
Thu Dec 17 07:04:12 2015
@@ -18,16 +18,17 @@
  */
 package org.apache.yay.core;
 
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.LinkedList;
-import java.util.Random;
-import org.apache.yay.Feature;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
 import org.junit.Before;
 import org.junit.Test;
 
+import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Random;
+
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertTrue;
 
@@ -143,16 +144,11 @@ public class BasicPerceptronTest {
                                                                 final 
Double... params) {
     return new TrainingExample<Double, Double>() {
       @Override
-      public ArrayList<Feature<Double>> getFeatures() {
-        ArrayList<Feature<Double>> features = new ArrayList<>();
-        Feature<Double> byasFeature = new Feature<>();
-        byasFeature.setValue(1d);
+      public List<Double> getFeatures() {
+        List<Double> features = new LinkedList<>();
+        Double byasFeature = 1d;
         features.add(byasFeature);
-        for (Double d : params) {
-          Feature<Double> feature = new Feature<>();
-          feature.setValue(d);
-          features.add(feature);
-        }
+        Collections.addAll(features, params);
         return features;
       }
 

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
 (original)
+++ 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/NeuralNetworkIntegrationTest.java
 Thu Dec 17 07:04:12 2015
@@ -22,12 +22,19 @@ import org.apache.commons.math3.linear.A
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.ml.distance.CanberraDistance;
 import org.apache.commons.math3.ml.distance.DistanceMeasure;
-import org.apache.yay.*;
+import org.apache.yay.Input;
+import org.apache.yay.LearningStrategy;
+import org.apache.yay.NeuralNetwork;
+import org.apache.yay.TrainingExample;
+import org.apache.yay.TrainingSet;
 import org.apache.yay.core.utils.ExamplesFactory;
 import org.junit.Test;
 
 import java.util.ArrayList;
 import java.util.Collection;
+import java.util.Collections;
+import java.util.LinkedList;
+import java.util.List;
 import java.util.Random;
 
 import static org.junit.Assert.assertEquals;
@@ -105,15 +112,10 @@ public class NeuralNetworkIntegrationTes
 
   private Input<Double> createSample(final Double... params) {
     return () -> {
-      ArrayList<Feature<Double>> features = new ArrayList<Feature<Double>>();
-      Feature<Double> byasFeature = new Feature<Double>();
-      byasFeature.setValue(1d);
+      List<Double> features = new LinkedList<>();
+      Double byasFeature = 1d;
       features.add(byasFeature);
-      for (Double d : params) {
-        Feature<Double> feature = new Feature<Double>();
-        feature.setValue(d);
-        features.add(feature);
-      }
+      Collections.addAll(features, params);
       return features;
     };
   }

Modified: 
labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java 
(original)
+++ labs/yay/trunk/core/src/test/java/org/apache/yay/core/WordVectorsTest.java 
Thu Dec 17 07:04:12 2015
@@ -23,9 +23,7 @@ import org.apache.commons.math3.linear.M
 import org.apache.commons.math3.linear.RealMatrix;
 import org.apache.commons.math3.ml.distance.DistanceMeasure;
 import org.apache.commons.math3.ml.distance.EuclideanDistance;
-import org.apache.commons.math3.util.FastMath;
 import org.apache.yay.ActivationFunction;
-import org.apache.yay.Feature;
 import org.apache.yay.NeuralNetwork;
 import org.apache.yay.TrainingExample;
 import org.apache.yay.TrainingSet;
@@ -78,7 +76,7 @@ public class WordVectorsTest {
     Path path = 
Paths.get(getClass().getResource("/word2vec/test.txt").getFile());
 
     System.out.println("reading fragments");
-    int window = 4;
+    int window = 3;
     Queue<List<byte[]>> fragments = getFragments(path, window);
     assertFalse(fragments.isEmpty());
     System.out.println("generating vocabulary");
@@ -93,7 +91,7 @@ public class WordVectorsTest {
     int inputSize = next.getFeatures().size();
     int outputSize = next.getOutput().length;
 
-    int hiddenSize = 30;
+    int hiddenSize = 10;
     System.out.println("initializing neural network");
     RealMatrix[] randomWeights = createRandomWeights(inputSize, hiddenSize, 
outputSize);
 
@@ -128,31 +126,31 @@ public class WordVectorsTest {
     System.out.println("measuring similarities");
     Collection<DistanceMeasure> measures = new LinkedList<>();
     measures.add(new EuclideanDistance());
-    measures.add(new DistanceMeasure() {
-      @Override
-      public double compute(double[] a, double[] b) {
-        double dp = 0.0;
-        double na = 0.0;
-        double nb = 0.0;
-        for (int i = 0; i < a.length; i++) {
-          dp += a[i] * b[i];
-          na += Math.pow(a[i], 2);
-          nb += Math.pow(b[i], 2);
-        }
-        double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
-        return 1 / cosineSimilarity;
-      }
-
-      @Override
-      public String toString() {
-        return "inverse cosine similarity distance measure";
-      }
-    });
-    measures.add((DistanceMeasure) (a, b) -> {
-      double da = 
FastMath.sqrt(MatrixUtils.createRealVector(a).dotProduct(MatrixUtils.createRealVector(a)));
-      double db = 
FastMath.sqrt(MatrixUtils.createRealVector(b).dotProduct(MatrixUtils.createRealVector(b)));
-      return Math.abs(db - da);
-    });
+//    measures.add(new DistanceMeasure() {
+//      @Override
+//      public double compute(double[] a, double[] b) {
+//        double dp = 0.0;
+//        double na = 0.0;
+//        double nb = 0.0;
+//        for (int i = 0; i < a.length; i++) {
+//          dp += a[i] * b[i];
+//          na += Math.pow(a[i], 2);
+//          nb += Math.pow(b[i], 2);
+//        }
+//        double cosineSimilarity = dp / (Math.sqrt(na) * Math.sqrt(nb));
+//        return 1 / cosineSimilarity;
+//      }
+//
+//      @Override
+//      public String toString() {
+//        return "inverse cosine similarity distance measure";
+//      }
+//    });
+//    measures.add((DistanceMeasure) (a, b) -> {
+//      double da = 
FastMath.sqrt(MatrixUtils.createRealVector(a).dotProduct(MatrixUtils.createRealVector(a)));
+//      double db = 
FastMath.sqrt(MatrixUtils.createRealVector(b).dotProduct(MatrixUtils.createRealVector(b)));
+//      return Math.abs(db - da);
+//    });
     for (DistanceMeasure distanceMeasure : measures) {
       System.out.println("computing similarity using " + distanceMeasure);
       computeSimilarities(vocabulary, wordVectors, distanceMeasure);
@@ -255,10 +253,9 @@ public class WordVectorsTest {
         }
 
         @Override
-        public ArrayList<Feature<Double>> getFeatures() {
-          ArrayList<Feature<Double>> features = new ArrayList<>();
-          Feature<Double> e = new Feature<>();
-          e.setValue((double) vocabulary.indexOf(new String(finalInputWord)));
+        public List<Double> getFeatures() {
+          List<Double> features = new ArrayList<>();
+          Double e = (double) vocabulary.indexOf(new String(finalInputWord));
           features.add(e);
           return features;
         }
@@ -275,7 +272,6 @@ public class WordVectorsTest {
 
 
   private List<String> getVocabulary(Path path) throws IOException {
-    long start = System.currentTimeMillis();
     Set<String> vocabulary = new HashSet<>();
     ByteBuffer buf = ByteBuffer.allocate(100);
     try (SeekableByteChannel sbc = Files.newByteChannel(path)) {
@@ -307,16 +303,17 @@ public class WordVectorsTest {
     } finally {
       buf.clear();
     }
-    long end = System.currentTimeMillis();
     List<String> list = Arrays.asList(vocabulary.toArray(new 
String[vocabulary.size()]));
     Collections.sort(list);
-    System.out.println("vocabulary read in " + (end - start) / 60000 + " 
minutes (" + (list.size()) + ")");
+//    for (String iw : vocabulary) {
+//      System.out.println(iw 
+"->"+Arrays.toString(ConversionUtils.hotEncode(iw.getBytes(), list)));
+//    }
     return list;
   }
 
   private String cleanString(CharBuffer charBuffer) {
     String s = charBuffer.toString();
-    return s.toLowerCase().replaceAll("\\.", " ").replaceAll("\\;", " 
").replaceAll("\\,", " ").replaceAll("\\:", " 
").replaceAll("\\-","").replaceAll("\\\"","");
+    return s.toLowerCase().replaceAll("\\.", " ").replaceAll("\\;", " 
").replaceAll("\\,", " ").replaceAll("\\:", " ").replaceAll("\\-\\s", 
"").replaceAll("\\\"", "");
   }
 
   private List<String> getVocabulary(Collection<byte[]> sentences) {

Modified: labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt
URL: 
http://svn.apache.org/viewvc/labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt?rev=1720488&r1=1720487&r2=1720488&view=diff
==============================================================================
--- labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt (original)
+++ labs/yay/trunk/core/src/test/resources/word2vec/abstracts.txt Thu Dec 17 
07:04:12 2015
@@ -1,10 +1,10 @@
-A calculus which combined the flexible geometric structure of vector mod- els 
with the crisp efficiency of Boolean logic would be extremely beneficial for 
modelling natural language. With this goal in mind, we present a formulation 
for logical connectives in vector spaces based on standard linear algebra, 
giving ex- amples of the use of vector negation to discriminate between 
different senses of ambiguous words. It turns out that the operators developed 
in this way are pre- cisely the connectives of quantum logic (Birkhoff and von 
Neumann, 1936), which to our knowledge have not been exploited before in 
natural language processing. In quantum logic, arbitrary sets are replaced by 
linear subspaces of a vector space, and set unions, intersections and 
complements are replaced by vector sum, inter- section and orthogonal 
complements of subspaces. We demonstrate that these logi- cal connectives 
(particularly the orthogonal complement for negation) are powerful tools for 
exploring and anal
 ysing word meanings and show distinct advantages over Boolean operators in 
document retrieval experiments.
+A calculus which combined the flexible geometric structure of vector models 
with the crisp efficiency of Boolean logic would be extremely beneficial for 
modelling natural language. With this goal in mind, we present a formulation 
for logical connectives in vector spaces based on standard linear algebra, 
giving ex- amples of the use of vector negation to discriminate between 
different senses of ambiguous words. It turns out that the operators developed 
in this way are pre- cisely the connectives of quantum logic (Birkhoff and von 
Neumann, 1936), which to our knowledge have not been exploited before in 
natural language processing. In quantum logic, arbitrary sets are replaced by 
linear subspaces of a vector space, and set unions, intersections and 
complements are replaced by vector sum, inter- section and orthogonal 
complements of subspaces. We demonstrate that these logi- cal connectives 
(particularly the orthogonal complement for negation) are powerful tools for 
exploring and analys
 ing word meanings and show distinct advantages over Boolean operators in 
document retrieval experiments.
 This paper is organised as follows. In Section 1.1 we describe some of the 
ways vectors have been used to represent the meanings of terms and documents in 
natural language processing, and describe the way the WORD-SPACE used in our 
later experiments is built automatically from text corpora. In Section 1.2 we 
define the logical connectives on vector spaces, focussing particularly on 
negation and disjunction. This introduces the basic material needed to 
understand the worked examples given in Section 1.3, and the document retrieval 
experiments described in Section 1.3.1. Section 1.4 gives a much fuller outline 
of the theory of quantum logic, the natural setting for the operators of 
Section 1.2. Finally, in Section 1.5, we examine the similarities between 
quantum logic and WORD-SPACE, asking whether quantum logic is an appropriate 
framework for modelling word-meanings or if the
 initial successes we have obtained are mainly coincidental.
 To some extent, this paper may have been written backwards, in that the 
im-plementation and examples are at the beginning and most of the theory is at 
the end. This is for two reasons. Firstly, we hoped to make the paper as 
accessible as possible and were afraid that beginning with an introduction to 
the full machinery of quantum logic would defeat this goal before the reader 
has a chance to realise that the techniques and equations used in this work are 
really quite elementary. Secondly, the link with ‘quantum logic’ was itself 
only brought to our attention after the bulk of the results in this paper had 
been obtained, and since this research is very much ongoing, we deemed it 
appropriate to give an honest account of its history and current state.
-We propose two novel model architectures for computing continuous vector 
repre- sentations of words from very large data sets. The quality of these 
representations is measured in a word similarity task, and the results are 
compared to the previ- ously best performing techniques based on different 
types of neural networks. We observe large improvements in accuracy at much 
lower computational cost, i.e. it takes less than a day to learn high quality 
word vectors from a 1.6 billion words data set. Furthermore, we show that these 
vectors provide state-of-the-art perfor- mance on our test set for measuring 
syntactic and semantic word similarities.
+We propose two novel model architectures for computing continuous vector 
representations of words from very large data sets. The quality of these 
representations is measured in a word similarity task, and the results are 
compared to the previ- ously best performing techniques based on different 
types of neural networks. We observe large improvements in accuracy at much 
lower computational cost, i.e. it takes less than a day to learn high quality 
word vectors from a 1.6 billion words data set. Furthermore, we show that these 
vectors provide state-of-the-art perfor- mance on our test set for measuring 
syntactic and semantic word similarities.
 Information Retrieval (IR) models need to deal with two difficult issues, 
vocabulary mismatch and term dependencies. Vocabulary mismatch corresponds to 
the difficulty of retrieving relevant documents that do not contain exact query 
terms but semantically related terms. Term dependencies refers to the need of 
considering the relationship between the words of the query when estimating the 
relevance of a document. A multitude of solutions has been proposed to solve 
each of these two problems, but no principled model solve both. In parallel, in 
the last few years, language models based on neural networks have been used to 
cope with complex natural language processing tasks like emotion and paraphrase 
detection. Although they present good abilities to cope with both term 
dependencies and vocabulary mismatch problems, thanks to the distributed 
representation of words they are based upon, such models could not be used 
readily in IR, where the estimation of one language model per document (
 or query) is required. This is both computationally unfeasible and prone to 
over-fitting. Based on a recent work that proposed to learn a generic language 
model that can be modified through a set of document-specific parameters, we 
explore use of new neural network models that are adapted to ad-hoc IR tasks. 
Within the language model IR framework, we propose and study the use of a 
generic language model as well as a document-specific language model. Both can 
be used as a smoothing component, but the latter is more adapted to the 
document at hand and has the potential of being used as a full document 
language model. We experiment with such models and analyze their results on 
TREC-1 to 8 datasets.
-Bidirectional Long Short-Term Mem- ory Recurrent Neural Network (BLSTM- RNN) 
has been shown to be very effec- tive for modeling and predicting sequen- tial 
data, e.g. speech utterances or hand- written documents. In this study, we 
propose to use BLSTM-RNN for a uni- fied tagging solution that can be applied 
to various tagging tasks including part- of-speech tagging, chunking and named 
entity recognition. Instead of exploiting specific features carefully optimized 
for each task, our solution only uses one set of task-independent features and 
internal representations learnt from unlabeled text for all tasks. Requiring no 
task specific knowledge or sophisticated feature engi- neering, our approach 
gets nearly state-of- the-art performance in all these three tag- ging tasks.
+Bidirectional Long Short-Term Memory Recurrent Neural Network (BLSTM-RNN) has 
been shown to be very effec- tive for modeling and predicting sequen- tial 
data, e.g. speech utterances or hand- written documents. In this study, we 
propose to use BLSTM-RNN for a uni- fied tagging solution that can be applied 
to various tagging tasks including part- of-speech tagging, chunking and named 
entity recognition. Instead of exploiting specific features carefully optimized 
for each task, our solution only uses one set of task-independent features and 
internal representations learnt from unlabeled text for all tasks. Requiring no 
task specific knowledge or sophisticated feature engi- neering, our approach 
gets nearly state-of- the-art performance in all these three tag- ging tasks.
 The recently introduced continuous Skip-gram model is an efficient method for 
learning high-quality distributed vector representations that capture a large 
num- ber of precise syntactic and semantic word relationships. In this paper we 
present several extensions that improve both the quality of the vectors and the 
training speed. By subsampling of the frequent words we obtain significant 
speedup and also learn more regular word representations. We also describe a 
simple alterna- tive to the hierarchical softmax called negative sampling.
 An inherent limitation of word representations is their indifference to word 
order and their inability to represent idiomatic phrases. For example, the 
meanings of “Canada” and “Air” cannot be easily combined to obtain 
“Air Canada”. Motivated by this example, we present a simple method for 
finding phrases in text, and show that learning good vector representations for 
millions of phrases is possible.
 We extend the word2vec framework to capture meaning across languages. The 
input consists of a source text and a word-aligned parallel text in a second 
language. The joint word2vec tool then repre- sents words in both languages 
within a common “semantic” vector space. The result can be used to enrich 
lexicons of under-resourced languages, to identify ambiguities, and to perform 
clustering and classification. Experiments were conducted on a parallel 
English-Arabic corpus, as well as on English and Hebrew Biblical texts.
@@ -13,7 +13,7 @@ We report our participation in the conte
 We present a comprehensive study of eval- uation methods for unsupervised 
embed- ding techniques that obtain meaningful representations of words from 
text. Differ- ent evaluations result in different orderings of embedding 
methods, calling into ques- tion the common assumption that there is one single 
optimal vector representation. We present new evaluation techniques that 
directly compare embeddings with respect to specific queries. These methods re- 
duce bias, provide greater insight, and allow us to solicit data-driven 
relevance judgments rapidly and accurately through crowdsourcing.
 Continuous word and phrase vectors have proven useful in a number of NLP 
tasks. Here we describe our experience using them as a source of features for 
the SemEval-2015 task 3, consisting of two community question an- swering 
subtasks: Answer Selection for cate- gorizing answers as potential, good, and 
bad with regards to their corresponding questions; and YES/NO inference for 
predicting a yes, no, or unsure response to a YES/NO question us- ing all of 
its good answers. Our system ranked 6th and 1st in the English answer selection 
and YES/NO inference subtasks respectively, and 2nd in the Arabic answer 
selection subtask.
 The word2vec model and application by Mikolov et al. have attracted a great 
amount of attention in recent two years. The vector representations of words 
learned by word2vec models have been proven to be able to carry semantic 
meanings and are useful in various NLP tasks. As an increasing number of 
researchers would like to experiment with word2vec, I notice that there lacks a 
material that comprehensively explains the parameter learning process of 
word2vec in details, thus preventing many people with less neural network 
experience from understanding how exactly word2vec works.
-This note provides detailed derivations and explanations of the parameter up- 
date equations for the word2vec models, including the original continuous 
bag-of-word (CBOW) and skip-gram models, as well as advanced tricks, 
hierarchical soft-max and negative sampling. In the appendix a review is given 
on the basics of neuron network models and backpropagation.
+This note provides detailed derivations and explanations of the parameter 
update equations for the word2vec models, including the original continuous 
bag-of-word (CBOW) and skip-gram models, as well as advanced tricks, 
hierarchical soft-max and negative sampling. In the appendix a review is given 
on the basics of neuron network models and backpropagation.
 Over the past few years, neural networks have re-emerged as powerful 
machine-learning
 models, yielding state-of-the-art results in fields such as image recognition 
and speech
 processing. More recently, neural network models started to be applied also to 
textual
@@ -22,4 +22,13 @@ network models from the perspective of n
 to bring natural-language researchers up to speed with the neural techniques. 
The tutorial
 covers input encoding for natural language tasks, feed-forward networks, 
convolutional
 networks, recurrent networks and recursive networks, as well as the 
computation graph
-abstraction for automatic gradient computation
\ No newline at end of file
+abstraction for automatic gradient computation
+The development of intelligent machines is one of the biggest unsolved
+challenges in computer science. In this paper, we propose some
+fundamental properties these machines should have, focusing in particular
+on communication and learning. We discuss a simple environment
+that could be used to incrementally teach a machine the basics
+of natural-language-based communication, as a prerequisite to more
+complex interaction with human users. We also present some conjectures
+on the sort of algorithms the machine should support in order
+to profitably learn from the environment.
\ No newline at end of file



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@labs.apache.org
For additional commands, e-mail: commits-h...@labs.apache.org

Reply via email to