Author: srowen
Date: Fri May  1 07:32:27 2009
New Revision: 770553

URL: http://svn.apache.org/viewvc?rev=770553&view=rev
Log:
Added SamplingIterator and SamplingIterable and fixed up issue with sampling in 
NearestNUserNeighborhood

Added:
    
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
    
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
Modified:
    
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
    
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
    
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java

Added: 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java?rev=770553&view=auto
==============================================================================
--- 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
 (added)
+++ 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
 Fri May  1 07:32:27 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Wraps an {...@link Iterable} whose {...@link Iterable#iterator()} returns 
only some subset of the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterable<T> implements Iterable<T> {
+
+  private final Iterable<? extends T> delegate;
+  private final double samplingRate;
+
+  public SamplingIterable(Iterable<? extends T> delegate, double samplingRate) 
{
+    this.delegate = delegate;
+    this.samplingRate = samplingRate;
+  }
+
+  @Override
+  public Iterator<T> iterator() {
+    return new SamplingIterator<T>(delegate.iterator(), samplingRate);
+  }
+
+  public static <T> Iterable<T> maybeWrapIterable(Iterable<T> delegate, double 
samplingRate) {
+    return samplingRate >= 1.0 ? delegate : new SamplingIterable<T>(delegate, 
samplingRate);
+  }
+
+}
\ No newline at end of file

Added: 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java?rev=770553&view=auto
==============================================================================
--- 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
 (added)
+++ 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
 Fri May  1 07:32:27 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.Random;
+import java.util.NoSuchElementException;
+
+/**
+ * Wraps an {...@link Iterator} and returns only some subset of the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterator<T> implements Iterator<T> {
+
+  private static final Random r = RandomUtils.getRandom();
+
+  private final Iterator<? extends T> delegate;
+  private final double samplingRate;
+  private T next;
+  private boolean hasNext;
+
+  public SamplingIterator(Iterator<? extends T> delegate, double samplingRate) 
{
+    this.delegate = delegate;
+    this.samplingRate = samplingRate;
+    this.hasNext = true;
+    doNext();
+  }
+
+  @Override
+  public boolean hasNext() {
+    return hasNext;
+  }
+
+  @Override
+  public T next() {
+    if (hasNext) {
+      T result = next;
+      doNext();
+      return result;
+    }
+    throw new NoSuchElementException();
+  }
+
+  private void doNext() {
+    boolean found = false;
+    while (delegate.hasNext()) {
+      T delegateNext = delegate.next();
+      if (r.nextDouble() < samplingRate) {
+        next = delegateNext;
+        found = true;
+        break;
+      }
+    }
+    if (!found) {
+      hasNext = false;
+      next = null;
+    }
+  }
+
+  /**
+   * @throws UnsupportedOperationException
+   */
+  @Override
+  public void remove() {
+    throw new UnsupportedOperationException();
+  }
+
+}

Modified: 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
 (original)
+++ 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
 Fri May  1 07:32:27 2009
@@ -20,20 +20,16 @@
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
 
 import java.util.Collection;
-import java.util.Random;
 
 /**
  * <p>Contains methods and resources useful to all classes in this package.</p>
  */
 abstract class AbstractUserNeighborhood implements UserNeighborhood {
 
-  private static final Random random = RandomUtils.getRandom();
-
   private final UserSimilarity userSimilarity;
   private final DataModel dataModel;
   private final double samplingRate;
@@ -64,8 +60,8 @@
     return dataModel;
   }
 
-  final boolean sampleForUser() {
-    return samplingRate >= 1.0 || random.nextDouble() < samplingRate;
+  final double getSamplingRate() {
+    return samplingRate;
   }
 
   @Override

Modified: 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
 (original)
+++ 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
 Fri May  1 07:32:27 2009
@@ -22,6 +22,7 @@
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.User;
 import org.apache.mahout.cf.taste.impl.recommender.TopItems;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -31,7 +32,7 @@
 
 /**
  * <p>Computes a neighborhood consisting of the nearest n {...@link User}s to 
a given {...@link User}.
- * "Nearest" is defined by the given {...@link 
org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * "Nearest" is defined by the given {...@link UserSimilarity}.</p>
  */
 public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
 
@@ -97,7 +98,8 @@
 
     TopItems.Estimator<User> estimator = new Estimator(userSimilarityImpl, 
theUser, minSimilarity);
 
-    List<User> neighborhood = TopItems.getTopUsers(n, dataModel.getUsers(), 
null, estimator);
+    Iterable<? extends User> users = 
SamplingIterable.maybeWrapIterable(dataModel.getUsers(), getSamplingRate());
+    List<User> neighborhood = TopItems.getTopUsers(n, users, null, estimator);
 
     log.trace("UserNeighborhood around user ID '{}' is: {}", userID, 
neighborhood);
 

Modified: 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL: 
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
--- 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
 (original)
+++ 
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
 Fri May  1 07:32:27 2009
@@ -21,6 +21,7 @@
 import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -33,7 +34,7 @@
 /**
  * <p>Computes a neigbhorhood consisting of all {...@link User}s whose 
similarity to the
  * given {...@link User} meets or exceeds a certain threshold. Similarity is 
defined by the given
- * {...@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * {...@link UserSimilarity}.</p>
  */
 public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
 
@@ -83,12 +84,14 @@
     DataModel dataModel = getDataModel();
     User theUser = dataModel.getUser(userID);
     List<User> neighborhood = new ArrayList<User>();
-    Iterator<? extends User> users = dataModel.getUsers().iterator();
+    Iterable<? extends User> usersIterable =
+      SamplingIterable.maybeWrapIterable(dataModel.getUsers(), 
getSamplingRate());
+    Iterator<? extends User> users = usersIterable.iterator();
     UserSimilarity userSimilarityImpl = getUserSimilarity();
 
     while (users.hasNext()) {
       User user = users.next();
-      if (sampleForUser() && !userID.equals(user.getID())) {
+      if (!userID.equals(user.getID())) {
         double theSimilarity = userSimilarityImpl.userSimilarity(theUser, 
user);
         if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
           neighborhood.add(user);


Reply via email to