Author: srowen
Date: Mon Dec 5 17:54:39 2011
New Revision: 1210544
URL: http://svn.apache.org/viewvc?rev=1210544&view=rev
Log:
MAHOUT-902 item similarity is now NaN for no overlap
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarityTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java?rev=1210544&r1=1210543&r2=1210544&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java
Mon Dec 5 17:54:39 2011
@@ -105,6 +105,9 @@ public final class TanimotoCoefficientSi
private double doItemSimilarity(long itemID1, long itemID2, int preferring1)
throws TasteException {
DataModel dataModel = getDataModel();
int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(itemID1,
itemID2);
+ if (preferring1and2 == 0) {
+ return Double.NaN;
+ }
int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
return (double) preferring1and2 / (double) (preferring1 + preferring2 -
preferring1and2);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java?rev=1210544&r1=1210543&r2=1210544&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/measures/TanimotoCoefficientSimilarity.java
Mon Dec 5 17:54:39 2011
@@ -21,7 +21,7 @@ public class TanimotoCoefficientSimilari
@Override
public double similarity(double dots, double normA, double normB, int
numberOfColumns) {
- return dots / (normA + normB - dots);
+ return dots == 0 ? Double.NaN : dots / (normA + normB - dots);
}
@Override
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarityTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarityTest.java?rev=1210544&r1=1210543&r2=1210544&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarityTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarityTest.java
Mon Dec 5 17:54:39 2011
@@ -89,4 +89,33 @@ public final class TanimotoCoefficientSi
new TanimotoCoefficientSimilarity(getDataModel()).refresh(null);
}
+ @Test
+ public void testReturnNaNDoubleWhenNoSimilaritiesForTwoItems() throws
Exception {
+ DataModel dataModel = getDataModel(
+ new long[] {1, 2},
+ new Double[][] {
+ {null, null, 3.0},
+ {1.0, 1.0, null},
+ });
+ Double similarity = new
TanimotoCoefficientSimilarity(dataModel).itemSimilarity(1, 2);
+ assertEquals(Double.NaN, similarity, EPSILON);
+ }
+
+ @Test
+ public void testItemsSimilarities() throws Exception {
+ DataModel dataModel = getDataModel(
+ new long[] {1, 2},
+ new Double[][] {
+ {2.0, null, 2.0},
+ {1.0, 1.0, 1.0},
+ });
+ TanimotoCoefficientSimilarity tCS = new
TanimotoCoefficientSimilarity(dataModel);
+ assertEquals(0.5, tCS.itemSimilarity(0, 1), EPSILON);
+ assertEquals(1, tCS.itemSimilarity(0, 2), EPSILON);
+
+ double[] similarities = tCS.itemSimilarities(0, new long [] {1, 2});
+ assertEquals(0.5, similarities[0], EPSILON);
+ assertEquals(1, similarities[1], EPSILON);
+ }
+
}
\ No newline at end of file