Author: srowen
Date: Wed Dec  7 10:24:47 2011
New Revision: 1211377

URL: http://svn.apache.org/viewvc?rev=1211377&view=rev
Log:
MAHOUT-910 revamp sampling candidate strategy to expose sampling of items, 
items' users, and those users' items

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategyTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java?rev=1211377&r1=1211376&r2=1211377&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java
 Wed Dec  7 10:24:47 2011
@@ -1,4 +1,4 @@
-/**
+/*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
@@ -20,6 +20,9 @@ package org.apache.mahout.cf.taste.impl.
 import com.google.common.base.Preconditions;
 import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator;
+import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
+import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.Preference;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
@@ -28,57 +31,89 @@ import org.apache.mahout.common.iterator
 import java.util.Iterator;
 
 /**
- * <p>returns all items that have not been rated by the user and that were 
preferred by another user
- * that has preferred at least one item that the current user has preferred 
too</p>
+ * <p>Returns all items that have not been rated by the user <em>(3)</em> and 
that were preferred by another user
+ * <em>(2)</em> that has preferred at least one item <em>(1)</em> that the 
current user has preferred too.</p>
+ *
+ * <p>This strategy uses sampling to limit the number of items that are 
considered, by sampling three different
+ * things, noted above:</p>
  *
- * <p>this strategy uses sampling in a way that only a certain amount of 
preferences per item is considered
- * <pre>
+ * <ol>
+ *   <li>The items</li>
+ * </ol>
+ * 
+ * <p><pre>
  * max(defaultMaxPrefsPerItemConsidered, userItemCountFactor * 
log(max(N_users, N_items)))
  * </pre></p>
+ * 
+ * <p>This limit is applied in two ways. First, for each item that the current 
user prefers, it limits
+ * the number of other users who preferred that item that are then considered. 
Then for each of those users,
+ * it limits the number of their preferred items that are added to the list of 
candidates.</p>
  */
 public class SamplingCandidateItemsStrategy extends 
AbstractCandidateItemsStrategy {
 
-  private final int defaultMaxPrefsPerItemConsidered;
-  private final int userItemCountMultiplier;
+  private static final int DEFAULT_FACTOR = 5;
+  
+  private final int maxItems;
+  private final int maxUsersPerItem;
+  private final int maxItemsPerUser;
 
-  /**
-   * uses defaultMaxPrefsPerItemConsidered = 100 and userItemCountMultiplier = 
20 as default values
-   * 
-   * @see SamplingCandidateItemsStrategy#SamplingCandidateItemsStrategy(int, 
int)
-   */
-  public SamplingCandidateItemsStrategy() {
-    this(100, 20);
+  public SamplingCandidateItemsStrategy(int numUsers, int numItems) {
+    this(DEFAULT_FACTOR, DEFAULT_FACTOR, DEFAULT_FACTOR, numUsers, numItems);
   }
 
-  /**
-   * <p>the maximum number of prefs considered per item will be computed like 
this:
-   * <pre>
-   *   max(defaultMaxPrefsPerItemConsidered, userItemCountFactor * 
log(max(N_users, N_items)))
-   * </pre>
-   * </p>
-   */
-  public SamplingCandidateItemsStrategy(int defaultMaxPrefsPerItemConsidered, 
int userItemCountMultiplier) {
-    Preconditions.checkArgument(defaultMaxPrefsPerItemConsidered > 0, 
"defaultMaxPrefsPerItemConsidered must be " +
-        "greater zero");
-    Preconditions.checkArgument(userItemCountMultiplier > 0, 
"userItemCountMultiplier must be greater zero");
-    this.defaultMaxPrefsPerItemConsidered = defaultMaxPrefsPerItemConsidered;
-    this.userItemCountMultiplier = userItemCountMultiplier;
+  public SamplingCandidateItemsStrategy(int itemsFactor,
+                                        int usersPerItemFactor,
+                                        int candidatesPerUserFactor,
+                                        int numUsers,
+                                        int numItems) {
+    Preconditions.checkArgument(itemsFactor > 0);
+    Preconditions.checkArgument(usersPerItemFactor > 0);
+    Preconditions.checkArgument(candidatesPerUserFactor > 0);
+    Preconditions.checkArgument(numUsers > 0);
+    Preconditions.checkArgument(numItems > 0);
+    maxItems = (int) (itemsFactor * (1.0 + Math.log(numItems)));
+    maxUsersPerItem = (int) (itemsFactor * (1.0 + Math.log(numUsers)));
+    maxItemsPerUser = (int) (itemsFactor *(1.0 + Math.log(numItems)));
   }
 
   @Override
   protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel 
dataModel) throws TasteException {
-    int maxPrefsPerItemConsidered = (int) 
Math.max(defaultMaxPrefsPerItemConsidered,
-        userItemCountMultiplier * Math.log(Math.max(dataModel.getNumUsers(), 
dataModel.getNumItems())));
     FastIDSet possibleItemsIDs = new FastIDSet();
-    for (long itemID : preferredItemIDs) {
+    LongPrimitiveIterator preferredItemIDsIterator = new 
LongPrimitiveArrayIterator(preferredItemIDs);
+    if (preferredItemIDs.length > maxItems) {
+      preferredItemIDsIterator =
+          new SamplingLongPrimitiveIterator(preferredItemIDsIterator, (double) 
maxItems / preferredItemIDs.length);
+    }
+    while (preferredItemIDsIterator.hasNext()) {
+      long itemID = preferredItemIDsIterator.nextLong();
       PreferenceArray prefs = dataModel.getPreferencesForItem(itemID);
-      int prefsConsidered = Math.min(prefs.length(), 
maxPrefsPerItemConsidered);
-      Iterator<Preference> sampledPrefs = new 
FixedSizeSamplingIterator<Preference>(prefsConsidered, prefs.iterator());
-      while (sampledPrefs.hasNext()) {
-        
possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID()));
+      int prefsLength = prefs.length();
+      if (prefsLength > maxUsersPerItem) {
+        Iterator<Preference> sampledPrefs =
+            new FixedSizeSamplingIterator<Preference>(maxUsersPerItem, 
prefs.iterator());
+        while (sampledPrefs.hasNext()) {
+          addSomeOf(possibleItemsIDs, 
dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID()));
+        }
+      } else {
+        for (int i = 0; i < prefsLength; i++) {
+          addSomeOf(possibleItemsIDs, 
dataModel.getItemIDsFromUser(prefs.getUserID(i)));
+        }
       }
     }
     possibleItemsIDs.removeAll(preferredItemIDs);
     return possibleItemsIDs;
   }
+  
+  private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) {
+    if (itemIDs.size() > maxItemsPerUser) {
+      LongPrimitiveIterator it =
+          new SamplingLongPrimitiveIterator(itemIDs.iterator(), (double) 
maxItemsPerUser / itemIDs.size());
+      while (it.hasNext()) {
+        possibleItemIDs.add(it.nextLong());
+      }
+    } else {
+      possibleItemIDs.addAll(itemIDs);
+    }
+  }
+  
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategyTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategyTest.java?rev=1211377&r1=1211376&r2=1211377&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategyTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategyTest.java
 Wed Dec  7 10:24:47 2011
@@ -60,7 +60,8 @@ public final class SamplingCandidateItem
 
     DataModel dataModel = new GenericDataModel(userData);
 
-    CandidateItemsStrategy strategy = new SamplingCandidateItemsStrategy(1, 1);
+    CandidateItemsStrategy strategy =
+        new SamplingCandidateItemsStrategy(1, 1, 1, dataModel.getNumUsers(), 
dataModel.getNumItems());
 
     FastIDSet candidateItems = strategy.getCandidateItems(123L, 
prefArrayOfUser123, dataModel);
     /* result can be either item2 or item3 or empty */


Reply via email to