Author: dfilimon
Date: Fri May 17 12:49:52 2013
New Revision: 1483776
URL: http://svn.apache.org/r1483776
Log:
MAHOUT-1217: Nearest neighbor searchers sometimes fail to remove points
This fixes FastProjectionSearch's searchFirst() which was not also searching
through pendingAdditions. I think I replicated the bug in the new testRemove()
in SearchSanityTest that now passes.
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
Modified: mahout/trunk/CHANGELOG
URL:
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Fri May 17 12:49:52 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.8 - unreleased
+__MAHOUT-1217: Nearest neighbor searchers sometimes fail to remove points: fix
in FastProjectionSearch's searchFirst (dfilimon)
+
__MAHOUT-1216: Add locality sensitive hashing and a LocalitySensitiveHash
searcher (dfilimon)
__MAHOUT-1181: Adding StreamingKMeans MapReduce classes (dfilimon)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/neighborhood/FastProjectionSearch.java
Fri May 17 12:49:52 2013
@@ -10,10 +10,10 @@ import com.google.common.collect.Abstrac
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
-import org.apache.mahout.math.random.RandomProjector;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.random.RandomProjector;
import org.apache.mahout.math.random.WeightedThing;
/**
@@ -185,6 +185,14 @@ public class FastProjectionSearch extend
}
}
+ for (Vector vector : pendingAdditions) {
+ double distance = distanceMeasure.distance(vector, query);
+ if (distance < bestDistance && (!differentThanQuery ||
!vector.equals(query))) {
+ bestDistance = distance;
+ bestVector = vector;
+ }
+ }
+
return new WeightedThing<Vector>(bestVector, bestDistance);
}
@@ -206,6 +214,8 @@ public class FastProjectionSearch extend
break;
}
double oldWeight = currProjections.get(middle).getWeight();
+ // Elements to be removed are kept in the sorted array until the next
reindex, but their inner vector
+ // is set to null.
scalarProjections.get(i).set(middle, new
WeightedThing<Vector>(oldWeight));
}
if (isProjected) {
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java?rev=1483776&r1=1483775&r2=1483776&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/neighborhood/SearchSanityTest.java
Fri May 17 12:49:52 2013
@@ -29,6 +29,7 @@ import org.apache.mahout.math.DenseVecto
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.MatrixSlice;
import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.jet.math.Constants;
import org.apache.mahout.math.random.MultiNormal;
import org.apache.mahout.math.random.WeightedThing;
import org.junit.Test;
@@ -199,8 +200,27 @@ public class SearchSanityTest extends Ma
assertEquals("First isn't self", 0, first.getWeight(), 0);
assertEquals("First isn't self", datapoint, first.getValue());
assertEquals("First doesn't match", first, firstTwo.get(0));
- assertEquals(String.format("Second doesn't match got %f expected %f",
second.getWeight(), firstTwo.get(1).getWeight()),
- second, firstTwo.get(1));
+ assertEquals("Second doesn't match", second, firstTwo.get(1));
+ }
+ }
+
+ @Test
+ public void testRemove() {
+ searcher.clear();
+ for (int i = 0; i < dataPoints.rowSize(); ++i) {
+ Vector datapoint = dataPoints.viewRow(i);
+ searcher.add(datapoint);
+ // As long as points are not searched for right after being added, in
FastProjectionSearch, points are not
+ // merged with the main list right away, so if a search for a point
occurs before it's merged the pendingAdditions
+ // list also needs to be looked at.
+ // This used to not be the case for searchFirst(), thereby causing
removal failures.
+ if (i % 2 == 0) {
+ assertTrue("Failed to find self [search]",
+ searcher.search(datapoint, 1).get(0).getWeight() <
Constants.EPSILON);
+ assertTrue("Failed to find self [searchFirst]",
+ searcher.searchFirst(datapoint, false).getWeight() <
Constants.EPSILON);
+ assertTrue("Failed to remove self", searcher.remove(datapoint,
Constants.EPSILON));
+ }
}
}
}