Author: tdunning
Date: Fri Nov  5 03:20:32 2010
New Revision: 1031415

URL: http://svn.apache.org/viewvc?rev=1031415&view=rev
Log:
MAHOUT-539 - Fixed small bug in ConstantValueEncoder
Added tests for ConstantValueEncoder
Added more testing for ContinuousValueEncoder
Allow ContinuousValueEncoder to accept null original form and use weight 
instead.

Added:
    
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java
      - copied, changed from r1031414, 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/CachingValueEncoder.java
 Fri Nov  5 03:20:32 2010
@@ -38,9 +38,11 @@ public abstract class CachingValueEncode
   @Override
   public void setProbes(int probes) {
     super.setProbes(probes);
-    cacheProbeLocations(CONTINUOUS_VALUE_HASH_SEED);
+    cacheProbeLocations(getSeed());
   }
 
+  protected abstract int getSeed();
+
   private void cacheProbeLocations(int seed) {
     cachedProbes = new int[getProbes()];
     for (int i = 0; i < getProbes(); i++) {

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoder.java
 Fri Nov  5 03:20:32 2010
@@ -49,4 +49,9 @@ public class ConstantValueEncoder extend
   public String asString(String originalForm) {
     return getName();
   }
+
+  @Override
+  protected int getSeed() {
+    return 0;
+  }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoder.java
 Fri Nov  5 03:20:32 2010
@@ -48,7 +48,11 @@ public class ContinuousValueEncoder exte
 
   @Override
   protected double getWeight(byte[] originalForm, double w) {
-    return w * Double.parseDouble(new String(originalForm));
+    if (originalForm!=null) {
+      return w * Double.parseDouble(new String(originalForm));
+    } else {
+      return w;
+    }
   }
 
   /**
@@ -63,4 +67,9 @@ public class ContinuousValueEncoder exte
   public String asString(String originalForm) {
     return getName() + ':' + originalForm;
   }
+
+  @Override
+  protected int getSeed() {
+    return CONTINUOUS_VALUE_HASH_SEED;
+  }
 }

Copied: 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java
 (from r1031414, 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java&r1=1031414&r2=1031415&rev=1031415&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ConstantValueEncoderTest.java
 Fri Nov  5 03:20:32 2010
@@ -22,58 +22,53 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Vector;
 import org.junit.Test;
 
-public final class ContinuousValueEncoderTest extends MahoutTestCase {
-  
+public final class ConstantValueEncoderTest extends MahoutTestCase {
+
   @Test
   public void testAddToVector() {
-    FeatureVectorEncoder enc = new ContinuousValueEncoder("foo");
+    FeatureVectorEncoder enc = new ConstantValueEncoder("foo");
     Vector v1 = new DenseVector(20);
-    enc.addToVector("-123", v1);
+    enc.addToVector((byte[]) null, -123, v1);
     assertEquals(-123, v1.minValue(), 0);
     assertEquals(0, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     v1 = new DenseVector(20);
-    enc.addToVector("123", v1);
+    enc.addToVector((byte[]) null, 123, v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(0, v1.minValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     Vector v2 = new DenseVector(20);
     enc.setProbes(2);
-    enc.addToVector("123", v2);
+    enc.addToVector((byte[]) null, 123, v2);
     assertEquals(123, v2.maxValue(), 0);
     assertEquals(2 * 123, v2.norm(1), 0);
 
+    // v1 has one probe, v2 has two.  The first probe in v2 should be in the 
same
+    // place as the only probe in v1
     v1 = v2.minus(v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
 
     Vector v3 = new DenseVector(20);
     enc.setProbes(2);
-    enc.addToVector("100", v3);
+    enc.addToVector((byte[]) null, 100, v3);
     v1 = v2.minus(v3);
     assertEquals(23, v1.maxValue(), 0);
     assertEquals(2 * 23, v1.norm(1), 0);
 
-    enc.addToVector("7", v1);
+    enc.addToVector((byte[]) null, 7, v1);
     assertEquals(30, v1.maxValue(), 0);
     assertEquals(2 * 30, v1.norm(1), 0);
+    assertEquals(30, v1.get(9), 0);
     assertEquals(30, v1.get(10), 0);
-    assertEquals(30, v1.get(18), 0);
-
-    try {
-      enc.addToVector("foobar", v1);
-      fail("Should have noticed bad numeric format");
-    } catch (NumberFormatException e) {
-      assertEquals("For input string: \"foobar\"", e.getMessage());
-    }
   }
 
   @Test
   public void testAsString() {
-    ContinuousValueEncoder enc = new ContinuousValueEncoder("foo");
-    assertEquals("foo:123", enc.asString("123"));
+    ConstantValueEncoder enc = new ConstantValueEncoder("foo");
+    assertEquals("foo", enc.asString("123"));
   }
 
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java?rev=1031415&r1=1031414&r2=1031415&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/encoders/ContinuousValueEncoderTest.java
 Fri Nov  5 03:20:32 2010
@@ -45,6 +45,8 @@ public final class ContinuousValueEncode
     assertEquals(123, v2.maxValue(), 0);
     assertEquals(2 * 123, v2.norm(1), 0);
 
+    // v1 has one probe, v2 has two.  The first probe in v2 should be in the 
same
+    // place as the only probe in v1
     v1 = v2.minus(v1);
     assertEquals(123, v1.maxValue(), 0);
     assertEquals(123, v1.norm(1), 0);
@@ -62,6 +64,13 @@ public final class ContinuousValueEncode
     assertEquals(30, v1.get(10), 0);
     assertEquals(30, v1.get(18), 0);
 
+    v2 = new DenseVector(20);
+    v3 = new DenseVector(20);
+    enc.setProbes(6);
+    enc.addToVector("145", v2);
+    enc.addToVector((byte[]) null, 145, v3);
+    assertEquals(0, v2.minus(v3).norm(1), 0);
+
     try {
       enc.addToVector("foobar", v1);
       fail("Should have noticed bad numeric format");


Reply via email to