Author: srowen
Date: Fri May 1 07:32:27 2009
New Revision: 770553
URL: http://svn.apache.org/viewvc?rev=770553&view=rev
Log:
Added SamplingIterator and SamplingIterable and fixed up issue with sampling in
NearestNUserNeighborhood
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java?rev=770553&view=auto
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
(added)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterable.java
Fri May 1 07:32:27 2009
@@ -0,0 +1,45 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+
+/**
+ * Wraps an {...@link Iterable} whose {...@link Iterable#iterator()} returns
only some subset of the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterable<T> implements Iterable<T> {
+
+ private final Iterable<? extends T> delegate;
+ private final double samplingRate;
+
+ public SamplingIterable(Iterable<? extends T> delegate, double samplingRate)
{
+ this.delegate = delegate;
+ this.samplingRate = samplingRate;
+ }
+
+ @Override
+ public Iterator<T> iterator() {
+ return new SamplingIterator<T>(delegate.iterator(), samplingRate);
+ }
+
+ public static <T> Iterable<T> maybeWrapIterable(Iterable<T> delegate, double
samplingRate) {
+ return samplingRate >= 1.0 ? delegate : new SamplingIterable<T>(delegate,
samplingRate);
+ }
+
+}
\ No newline at end of file
Added:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java?rev=770553&view=auto
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
(added)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/SamplingIterator.java
Fri May 1 07:32:27 2009
@@ -0,0 +1,83 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.impl.common;
+
+import java.util.Iterator;
+import java.util.Random;
+import java.util.NoSuchElementException;
+
+/**
+ * Wraps an {...@link Iterator} and returns only some subset of the elements
+ * that it would, as determined by a sampling rate parameter.
+ */
+public final class SamplingIterator<T> implements Iterator<T> {
+
+ private static final Random r = RandomUtils.getRandom();
+
+ private final Iterator<? extends T> delegate;
+ private final double samplingRate;
+ private T next;
+ private boolean hasNext;
+
+ public SamplingIterator(Iterator<? extends T> delegate, double samplingRate)
{
+ this.delegate = delegate;
+ this.samplingRate = samplingRate;
+ this.hasNext = true;
+ doNext();
+ }
+
+ @Override
+ public boolean hasNext() {
+ return hasNext;
+ }
+
+ @Override
+ public T next() {
+ if (hasNext) {
+ T result = next;
+ doNext();
+ return result;
+ }
+ throw new NoSuchElementException();
+ }
+
+ private void doNext() {
+ boolean found = false;
+ while (delegate.hasNext()) {
+ T delegateNext = delegate.next();
+ if (r.nextDouble() < samplingRate) {
+ next = delegateNext;
+ found = true;
+ break;
+ }
+ }
+ if (!found) {
+ hasNext = false;
+ next = null;
+ }
+ }
+
+ /**
+ * @throws UnsupportedOperationException
+ */
+ @Override
+ public void remove() {
+ throw new UnsupportedOperationException();
+ }
+
+}
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java
Fri May 1 07:32:27 2009
@@ -20,20 +20,16 @@
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import java.util.Collection;
-import java.util.Random;
/**
* <p>Contains methods and resources useful to all classes in this package.</p>
*/
abstract class AbstractUserNeighborhood implements UserNeighborhood {
- private static final Random random = RandomUtils.getRandom();
-
private final UserSimilarity userSimilarity;
private final DataModel dataModel;
private final double samplingRate;
@@ -64,8 +60,8 @@
return dataModel;
}
- final boolean sampleForUser() {
- return samplingRate >= 1.0 || random.nextDouble() < samplingRate;
+ final double getSamplingRate() {
+ return samplingRate;
}
@Override
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java
Fri May 1 07:32:27 2009
@@ -22,6 +22,7 @@
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
import org.apache.mahout.cf.taste.impl.recommender.TopItems;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -31,7 +32,7 @@
/**
* <p>Computes a neighborhood consisting of the nearest n {...@link User}s to
a given {...@link User}.
- * "Nearest" is defined by the given {...@link
org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * "Nearest" is defined by the given {...@link UserSimilarity}.</p>
*/
public final class NearestNUserNeighborhood extends AbstractUserNeighborhood {
@@ -97,7 +98,8 @@
TopItems.Estimator<User> estimator = new Estimator(userSimilarityImpl,
theUser, minSimilarity);
- List<User> neighborhood = TopItems.getTopUsers(n, dataModel.getUsers(),
null, estimator);
+ Iterable<? extends User> users =
SamplingIterable.maybeWrapIterable(dataModel.getUsers(), getSamplingRate());
+ List<User> neighborhood = TopItems.getTopUsers(n, users, null, estimator);
log.trace("UserNeighborhood around user ID '{}' is: {}", userID,
neighborhood);
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java?rev=770553&r1=770552&r2=770553&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java
Fri May 1 07:32:27 2009
@@ -21,6 +21,7 @@
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.User;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -33,7 +34,7 @@
/**
* <p>Computes a neigbhorhood consisting of all {...@link User}s whose
similarity to the
* given {...@link User} meets or exceeds a certain threshold. Similarity is
defined by the given
- * {...@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.</p>
+ * {...@link UserSimilarity}.</p>
*/
public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood {
@@ -83,12 +84,14 @@
DataModel dataModel = getDataModel();
User theUser = dataModel.getUser(userID);
List<User> neighborhood = new ArrayList<User>();
- Iterator<? extends User> users = dataModel.getUsers().iterator();
+ Iterable<? extends User> usersIterable =
+ SamplingIterable.maybeWrapIterable(dataModel.getUsers(),
getSamplingRate());
+ Iterator<? extends User> users = usersIterable.iterator();
UserSimilarity userSimilarityImpl = getUserSimilarity();
while (users.hasNext()) {
User user = users.next();
- if (sampleForUser() && !userID.equals(user.getID())) {
+ if (!userID.equals(user.getID())) {
double theSimilarity = userSimilarityImpl.userSimilarity(theUser,
user);
if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) {
neighborhood.add(user);