Author: dfilimon
Date: Fri May 17 12:49:52 2013
New Revision: 1483776

URL: http://svn.apache.org/r1483776
Log:
MAHOUT-1217: Nearest neighbor searchers sometimes fail to remove points

This fixes FastProjectionSearch's searchFirst() which was not also searching
through pendingAdditions. I think I replicated the bug in the new testRemove()
in SearchSanityTest that now passes.


Modified:
    mahout/trunk/CHANGELOG
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java

Modified: mahout/trunk/CHANGELOG
URL: 
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri May 17 12:49:52 2013
@@ -2,6 +2,8 @@ Mahout Change Log
 
 Release 0.8 - unreleased
 
+__MAHOUT-1217: Nearest neighbor searchers sometimes fail to remove points: fix 
in FastProjectionSearch's searchFirst (dfilimon)
+
 __MAHOUT-1216: Add locality sensitive hashing and a LocalitySensitiveHash 
searcher (dfilimon)
 
 __MAHOUT-1181: Adding StreamingKMeans MapReduce classes (dfilimon)

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
 Fri May 17 12:49:52 2013
@@ -10,10 +10,10 @@ import com.google.common.collect.Abstrac
 import com.google.common.collect.Iterables;
 import com.google.common.collect.Lists;
 import com.google.common.collect.Sets;
-import org.apache.mahout.math.random.RandomProjector;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.random.RandomProjector;
 import org.apache.mahout.math.random.WeightedThing;
 
 /**
@@ -185,6 +185,14 @@ public class FastProjectionSearch extend
       }
     }
 
+    for (Vector vector : pendingAdditions) {
+      double distance = distanceMeasure.distance(vector, query);
+      if (distance < bestDistance && (!differentThanQuery || 
!vector.equals(query))) {
+        bestDistance = distance;
+        bestVector = vector;
+      }
+    }
+
     return new WeightedThing<Vector>(bestVector, bestDistance);
   }
 
@@ -206,6 +214,8 @@ public class FastProjectionSearch extend
         break;
       }
       double oldWeight = currProjections.get(middle).getWeight();
+      // Elements to be removed are kept in the sorted array until the next 
reindex, but their inner vector
+      // is set to null.
       scalarProjections.get(i).set(middle, new 
WeightedThing<Vector>(oldWeight));
     }
     if (isProjected) {

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
 Fri May 17 12:49:52 2013
@@ -29,6 +29,7 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.MatrixSlice;
 import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.jet.math.Constants;
 import org.apache.mahout.math.random.MultiNormal;
 import org.apache.mahout.math.random.WeightedThing;
 import org.junit.Test;
@@ -199,8 +200,27 @@ public class SearchSanityTest extends Ma
       assertEquals("First isn't self", 0, first.getWeight(), 0);
       assertEquals("First isn't self", datapoint, first.getValue());
       assertEquals("First doesn't match", first, firstTwo.get(0));
-      assertEquals(String.format("Second doesn't match got %f expected %f", 
second.getWeight(), firstTwo.get(1).getWeight()),
-          second, firstTwo.get(1));
+      assertEquals("Second doesn't match", second, firstTwo.get(1));
+    }
+  }
+
+  @Test
+  public void testRemove() {
+    searcher.clear();
+    for (int i = 0; i < dataPoints.rowSize(); ++i) {
+      Vector datapoint = dataPoints.viewRow(i);
+      searcher.add(datapoint);
+      // As long as points are not searched for right after being added, in 
FastProjectionSearch, points are not
+      // merged with the main list right away, so if a search for a point 
occurs before it's merged the pendingAdditions
+      // list also needs to be looked at.
+      // This used to not be the case for searchFirst(), thereby causing 
removal failures.
+      if (i % 2 == 0) {
+        assertTrue("Failed to find self [search]",
+            searcher.search(datapoint, 1).get(0).getWeight() < 
Constants.EPSILON);
+        assertTrue("Failed to find self [searchFirst]",
+            searcher.searchFirst(datapoint, false).getWeight() < 
Constants.EPSILON);
+        assertTrue("Failed to remove self", searcher.remove(datapoint, 
Constants.EPSILON));
+      }
     }
   }
 }


Reply via email to