Author: srowen
Date: Thu Mar 31 09:28:44 2011
New Revision: 1087226

URL: http://svn.apache.org/viewvc?rev=1087226&view=rev
Log:
MAHOUT-643 add city-block similarity metric

Added:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedCityBlockVectorSimilarity.java
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityType.java

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java?rev=1087226&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/similarity/CityBlockSimilarity.java
 Thu Mar 31 09:28:44 2011
@@ -0,0 +1,98 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.cf.taste.impl.similarity;
+
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+import org.apache.mahout.cf.taste.similarity.PreferenceInferrer;
+import org.apache.mahout.cf.taste.similarity.UserSimilarity;
+
+/**
+ * Implementation of City Block distance (also known as Manhattan distance) - 
the absolute value of the difference of
+ * each direction is summed.  The resulting unbounded distance is then mapped 
between 0 and 1.
+ */
+public final class CityBlockSimilarity implements UserSimilarity, 
ItemSimilarity {
+
+  private final DataModel dataModel;
+
+  public CityBlockSimilarity(DataModel dataModel) {
+    this.dataModel = dataModel;
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void setPreferenceInferrer(PreferenceInferrer inferrer) {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed);
+    RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel);
+  }
+
+  @Override
+  public double itemSimilarity(long itemID1, long itemID2) throws 
TasteException {
+    int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+    int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2);
+    int intersection = dataModel.getNumUsersWithPreferenceFor(itemID1, 
itemID2);
+    return doSimilarity(preferring1, preferring2, intersection);
+  }
+
+  @Override
+  public double[] itemSimilarities(long itemID1, long[] itemID2s) throws 
TasteException {
+    int preferring1 = dataModel.getNumUsersWithPreferenceFor(itemID1);
+    double[] distance = new double[itemID2s.length];
+    for (int i = 0; i < itemID2s.length; ++i) {
+      int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2s[i]);
+      int intersection = dataModel.getNumUsersWithPreferenceFor(itemID1, 
itemID2s[i]);
+      distance[i] = doSimilarity(preferring1, preferring2, intersection);
+    }
+    return distance;
+  }
+
+  @Override
+  public double userSimilarity(long userID1, long userID2) throws 
TasteException {
+    FastIDSet prefs1 = dataModel.getItemIDsFromUser(userID1);
+    FastIDSet prefs2 = dataModel.getItemIDsFromUser(userID2);
+    int prefs1Size = prefs1.size();
+    int prefs2Size = prefs2.size();
+    int intersectionSize = prefs1Size < prefs2Size ? 
prefs2.intersectionSize(prefs1) : prefs1.intersectionSize(prefs2);
+    return doSimilarity(prefs1Size, prefs2Size, intersectionSize);
+  }
+
+  /**
+   * Calculate City Block Distance from total non-zero values and 
intersections and map to a similarity value.
+   *
+   * @param pref1        number of non-zero values in left vector
+   * @param pref2        number of non-zero values in right vector
+   * @param intersection number of overlapping non-zero values
+   */
+  private static double doSimilarity(int pref1, int pref2, int intersection) {
+    int distance = pref1 + pref2 - 2 * intersection;
+    return 1.0 / (1.0 + distance);
+  }
+
+}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityType.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityType.java?rev=1087226&r1=1087225&r2=1087226&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityType.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/SimilarityType.java
 Thu Mar 31 09:28:44 2011
@@ -19,6 +19,7 @@ package org.apache.mahout.math.hadoop.si
 
 import java.util.Arrays;
 
+import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedCityBlockVectorSimilarity;
 import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedCooccurrenceVectorSimilarity;
 import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedEuclideanDistanceVectorSimilarity;
 import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedLoglikelihoodVectorSimilarity;
@@ -36,7 +37,8 @@ public enum SimilarityType {
   
SIMILARITY_PEARSON_CORRELATION(DistributedPearsonCorrelationVectorSimilarity.class),
   
SIMILARITY_TANIMOTO_COEFFICIENT(DistributedTanimotoCoefficientVectorSimilarity.class),
   
SIMILARITY_UNCENTERED_COSINE(DistributedUncenteredCosineVectorSimilarity.class),
-  
SIMILARITY_UNCENTERED_ZERO_ASSUMING_COSINE(DistributedUncenteredZeroAssumingCosineVectorSimilarity.class);
+  
SIMILARITY_UNCENTERED_ZERO_ASSUMING_COSINE(DistributedUncenteredZeroAssumingCosineVectorSimilarity.class),
+  SIMILARITY_CITY_BLOCK(DistributedCityBlockVectorSimilarity.class);
 
   private final Class<? extends DistributedVectorSimilarity> 
similarityImplementation;
 

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedCityBlockVectorSimilarity.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedCityBlockVectorSimilarity.java?rev=1087226&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedCityBlockVectorSimilarity.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/vector/DistributedCityBlockVectorSimilarity.java
 Thu Mar 31 09:28:44 2011
@@ -0,0 +1,48 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.mahout.math.hadoop.similarity.vector;
+
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.hadoop.similarity.Cooccurrence;
+
+/**
+ * Implementation of City Block distance (also known as Manhattan distance) - 
the absolute value of the difference of
+ * each direction is summed.  The resulting unbounded distance is then mapped 
between 1 and -1.
+ */
+public final class DistributedCityBlockVectorSimilarity extends 
AbstractDistributedVectorSimilarity {
+
+  @Override
+  protected double doComputeResult(int rowA,
+                                   int rowB,
+                                   Iterable<Cooccurrence> cooccurrences,
+                                   double weightOfVectorA,
+                                   double weightOfVectorB,
+                                   int numberOfColumns) {
+    int cooccurrenceCount = countElements(cooccurrences);
+    if (cooccurrenceCount == 0) {
+      return Double.NaN;
+    }
+    double distance = weightOfVectorA + weightOfVectorB - 2 * 
cooccurrenceCount;
+    return 1.0 / (1.0 + distance);
+  }
+
+  @Override
+  public double weight(Vector v) {
+    return (double) countElements(v.iterateNonZero());
+  }
+
+}


Reply via email to