Author: srowen Date: Mon Apr 26 14:49:37 2010 New Revision: 938070 URL: http://svn.apache.org/viewvc?rev=938070&view=rev Log: Remove normalization by vector length since it was possibly confusing and not necessarily desirable in all cases. Simplified version is comparably effective.
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java?rev=938070&r1=938069&r2=938070&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarity.java Mon Apr 26 14:49:37 2010 @@ -54,17 +54,8 @@ public final class EuclideanDistanceSimi @Override double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) { - if (n == 0) { - return Double.NaN; - } - double denominator = Math.sqrt(sumX2) + Math.sqrt(sumY2); - if (denominator == 0.0) { - return Double.NaN; - } - // normalize a bit for magnitude - sumXYdiff2 /= denominator; - // divide by n below to not automatically give users with more overlap more similarity - return 1.0 / (1.0 + Math.sqrt(sumXYdiff2) / n); + // divide denominator by n below to not automatically give users with more overlap more similarity + return n / (1.0 + Math.sqrt(sumXYdiff2)); } } \ No newline at end of file Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java?rev=938070&r1=938069&r2=938070&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/EuclideanDistanceSimilarityTest.java Mon Apr 26 14:49:37 2010 @@ -55,7 +55,7 @@ public final class EuclideanDistanceSimi {3.0, 3.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2); - assertTrue(Double.isNaN(correlation)); + assertEquals(1.0, correlation); } public void testNoCorrelation1() throws Exception { @@ -66,7 +66,7 @@ public final class EuclideanDistanceSimi {-3.0, 2.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2); - assertCorrelationEquals(0.424465381883345, correlation); + assertCorrelationEquals(0.24357264905599915, correlation); } public void testNoCorrelation1Weighted() throws Exception { @@ -77,7 +77,7 @@ public final class EuclideanDistanceSimi {-3.0, 2.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED).userSimilarity(1, 2); - assertCorrelationEquals(0.8081551272944483, correlation); + assertCorrelationEquals(0.747857549685333, correlation); } public void testNoCorrelation2() throws Exception { @@ -88,7 +88,7 @@ public final class EuclideanDistanceSimi {null, null, 1.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2); - assertTrue(Double.isNaN(correlation)); + assertEquals(0.0, correlation); } public void testNoCorrelation3() throws Exception { @@ -99,7 +99,7 @@ public final class EuclideanDistanceSimi {70.0, 80.0, 90.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2); - assertCorrelationEquals(0.3606507916004517, correlation); + assertCorrelationEquals(0.10244407226831752, correlation); } public void testSimple() throws Exception { @@ -110,7 +110,7 @@ public final class EuclideanDistanceSimi {2.0, 5.0, 6.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel).userSimilarity(1, 2); - assertCorrelationEquals(0.5896248568217328, correlation); + assertCorrelationEquals(0.5598164905901122, correlation); } public void testSimpleWeighted() throws Exception { @@ -121,7 +121,7 @@ public final class EuclideanDistanceSimi {2.0, 5.0, 6.0}, }); double correlation = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED).userSimilarity(1, 2); - assertCorrelationEquals(0.8974062142054332, correlation); + assertCorrelationEquals(0.889954122647528, correlation); } public void testFullItemCorrelation1() throws Exception { @@ -145,8 +145,7 @@ public final class EuclideanDistanceSimi }); double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1); - // Yeah, undefined in this case - assertTrue(Double.isNaN(correlation)); + assertEquals(1.0, correlation); } public void testNoItemCorrelation1() throws Exception { @@ -158,7 +157,7 @@ public final class EuclideanDistanceSimi }); double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1); - assertCorrelationEquals(0.424465381883345, correlation); + assertCorrelationEquals(0.24357264905599915, correlation); } public void testNoItemCorrelation2() throws Exception { @@ -168,9 +167,8 @@ public final class EuclideanDistanceSimi {null, 1.0, null}, {null, null, 1.0}, }); - double correlation = - new EuclideanDistanceSimilarity(dataModel).itemSimilarity(1, 2); - assertTrue(Double.isNaN(correlation)); + double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(1, 2); + assertEquals(0.0, correlation); } public void testNoItemCorrelation3() throws Exception { @@ -183,7 +181,7 @@ public final class EuclideanDistanceSimi }); double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1); - assertCorrelationEquals(0.3606507916004517, correlation); + assertCorrelationEquals(0.10244407226831752, correlation); } public void testSimpleItem() throws Exception { @@ -196,7 +194,7 @@ public final class EuclideanDistanceSimi }); double correlation = new EuclideanDistanceSimilarity(dataModel).itemSimilarity(0, 1); - assertCorrelationEquals(0.5896248568217328, correlation); + assertCorrelationEquals(0.5598164905901122, correlation); } public void testSimpleItemWeighted() throws Exception { @@ -209,7 +207,7 @@ public final class EuclideanDistanceSimi }); ItemSimilarity itemSimilarity = new EuclideanDistanceSimilarity(dataModel, Weighting.WEIGHTED); double correlation = itemSimilarity.itemSimilarity(0, 1); - assertCorrelationEquals(0.8974062142054332, correlation); + assertCorrelationEquals(0.889954122647528, correlation); } public void testRefresh() throws TasteException {