Author: tdunning
Date: Fri Aug 20 03:23:19 2010
New Revision: 987369

URL: http://svn.apache.org/viewvc?rev=987369&view=rev
Log:
Setting up for interaction effects in vector encoding.

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java?rev=987369&r1=987368&r2=987369&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/FeatureVectorEncoder.java
 Fri Aug 20 03:23:19 2010
@@ -110,6 +110,29 @@ public abstract class FeatureVectorEncod
   }
 
   /**
+   * Hash four strings and an integer into the range [0..numFeatures-1].
+   *
+   * @param term1       The first string.
+   * @param term2       The second string.
+   * @param term3       The third string
+   * @param term4       And the fourth.
+   * @param probe       An integer that modifies the resulting hash.
+   * @param numFeatures The range into which the resulting hash must fit.
+   * @return An integer in the range [0..numFeatures-1] that has good spread 
for small changes in
+   *         term and probe.
+   */
+  protected int hash(String term1, String term2, String term3, String term4, 
int probe, int numFeatures) {
+    long r = MurmurHash.hash64A(term1.getBytes(Charset.forName("UTF-8")), 
probe);
+    r = MurmurHash.hash64A(term2.getBytes(Charset.forName("UTF-8")), (int) r) 
% numFeatures;
+    r = MurmurHash.hash64A(term3.getBytes(Charset.forName("UTF-8")), (int) r) 
% numFeatures;
+    r = MurmurHash.hash64A(term4.getBytes(Charset.forName("UTF-8")), (int) r) 
% numFeatures;
+    if (r < 0) {
+      r += numFeatures;
+    }
+    return (int) r;
+  }
+
+  /**
    * Converts a value into a form that would help a human understand the 
internals of how the value
    * is being interpreted.  For text-like things, this is likely to be a list 
of the terms found
    * with associated weights (if any).

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java?rev=987369&r1=987368&r2=987369&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectors/InteractionValueEncoder.java
 Fri Aug 20 03:23:19 2010
@@ -21,62 +21,56 @@ import org.apache.mahout.math.Vector;
 
 public class InteractionValueEncoder extends FeatureVectorEncoder {
 
-    protected static final int INTERACTION_VALUE_HASH_SEED_1 = 100;
-    protected static final int INTERACTION_VALUE_HASH_SEED_2 = 200;
-
-    protected InteractionValueEncoder(String name) {
-       super(name, 2);
-     }
-
-      /**
-      * Adds a value to a vector.
-      *
-      * @param originalForm The original form of the first value as a string.
-      * @param data          The vector to which the value should be added.
-      */
-     @Override
-     public void addToVector(String originalForm, double w, Vector data) {
-     }
-
-     /**
-      * Adds a value to a vector.
-      *
-      * @param originalForm1 The original form of the first value as a string.
-      * @param originalForm2 The original form of the second value as a string.
-      * @param data          The vector to which the value should be added.
-      */
-     public void addInteractionToVector(String originalForm1, String 
originalForm2, Vector data) {
-       int probes = getProbes();
-       String name = getName();
-       for (int i = 0; i < probes; i++) {
-         int h1 = hash1(name, originalForm1, i, data.size());
-         int h2 = hash2(name, originalForm1, i, data.size());
-         int j =  hash1(name, originalForm2, i, data.size());
-         int n = (h1 + j*h2) % data.size();
-         trace(String.format("%s:%s", originalForm1, originalForm2), n);
-         data.set(n, data.get(n) + 1);
-       }
-     }
-
-    /**
-      * Converts a value into a form that would help a human understand the 
internals of how the value
-      * is being interpreted.  For text-like things, this is likely to be a 
list of the terms found with
-      * associated weights (if any).
-      *
-      * @param originalForm The original form of the value as a string.
-      * @return A string that a human can read.
-      */
-     @Override
-     public String asString(String originalForm) {
-       return String.format("%s:%s", getName(), originalForm);
-     }
-
-     protected int hash1(String term1, String term2, int probe, int 
numFeatures) {
-       return hash(term1, term2, 
probe+INTERACTION_VALUE_HASH_SEED_1,numFeatures);
-     }
-
-     protected int hash2(String term1, String term2, int probe, int 
numFeatures) {
-       return hash(term1, term2, 
probe+INTERACTION_VALUE_HASH_SEED_2,numFeatures);
-     }
+  protected static final int INTERACTION_VALUE_HASH_SEED_1 = 100;
+  protected static final int INTERACTION_VALUE_HASH_SEED_2 = 200;
+  private String name1;
+  private String name2;
+
+  protected InteractionValueEncoder(String name1, String name2) {
+    super(name1 + ":" + name2, 2);
+    this.name1 = name1;
+    this.name2 = name2;
+  }
+
+  /**
+   * Adds a value to a vector.
+   *
+   * @param originalForm The original form of the first value as a string.
+   * @param data         The vector to which the value should be added.
+   */
+  @Override
+  public void addToVector(String originalForm, double w, Vector data) {
+    throw new UnsupportedOperationException("Must have two arguments to encode 
interaction");
+  }
+
+  /**
+   * Adds a value to a vector.
+   *
+   * @param originalForm1 The original form of the first value as a string.
+   * @param originalForm2 The original form of the second value as a string.
+   * @param data          The vector to which the value should be added.
+   */
+  public void addToVector(String originalForm1, String originalForm2, Vector 
data) {
+    int probes = getProbes();
+    String name = getName();
+    for (int i = 0; i < probes; i++) {
+      int n = hash(name1, originalForm1, name2, originalForm2, i, data.size());
+      trace(String.format("%s:%s", originalForm1, originalForm2), n);
+      data.set(n, data.get(n) + 1);
+    }
+  }
+
+  /**
+   * Converts a value into a form that would help a human understand the 
internals of how the
+   * value is being interpreted.  For text-like things, this is likely to be a 
list of the terms
+   * found with associated weights (if any).
+   *
+   * @param originalForm The original form of the value as a string.
+   * @return A string that a human can read.
+   */
+  @Override
+  public String asString(String originalForm) {
+    return String.format("%s:%s", getName(), originalForm);
+  }
 }
 


Reply via email to