Author: srowen
Date: Fri May 1 17:38:13 2009
New Revision: 770768
URL: http://svn.apache.org/viewvc?rev=770768&view=rev
Log: (empty)
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/FarthestNeighborClusterSimilarity.java
Fri May 1 17:38:13 2009
@@ -21,23 +21,20 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
import org.apache.mahout.cf.taste.model.User;
import java.util.Collection;
-import java.util.Random;
/**
* <p>Defines cluster similarity as the <em>smallest</em> similarity between
any two
- * {...@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that
is, it says that clusters are close
+ * {...@link User}s in the clusters -- that is, it says that clusters are close
* when <em>all pairs</em> of their members have relatively high
similarity.</p>
*/
public final class FarthestNeighborClusterSimilarity implements
ClusterSimilarity {
- private static final Random random = RandomUtils.getRandom();
-
private final UserSimilarity similarity;
- private final double samplingPercentage;
+ private final double samplingRate;
/**
* <p>Constructs a {...@link FarthestNeighborClusterSimilarity} based on the
given {...@link UserSimilarity}.
@@ -49,19 +46,19 @@
/**
* <p>Constructs a {...@link FarthestNeighborClusterSimilarity} based on the
given {...@link UserSimilarity}.
- * By setting <code>samplingPercentage</code> to a value less than 1.0, this
implementation will only examine
+ * By setting <code>samplingRate</code> to a value less than 1.0, this
implementation will only examine
* that fraction of all user-user similarities between two clusters,
increasing performance at the expense
* of accuracy.</p>
*/
- public FarthestNeighborClusterSimilarity(UserSimilarity similarity, double
samplingPercentage) {
+ public FarthestNeighborClusterSimilarity(UserSimilarity similarity, double
samplingRate) {
if (similarity == null) {
throw new IllegalArgumentException("similarity is null");
}
- if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 ||
samplingPercentage > 1.0) {
- throw new IllegalArgumentException("samplingPercentage is invalid: " +
samplingPercentage);
+ if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate >
1.0) {
+ throw new IllegalArgumentException("samplingRate is invalid: " +
samplingRate);
}
this.similarity = similarity;
- this.samplingPercentage = samplingPercentage;
+ this.samplingRate = samplingRate;
}
@Override
@@ -71,13 +68,12 @@
return Double.NaN;
}
double leastSimilarity = Double.POSITIVE_INFINITY;
- for (User user1 : cluster1) {
- if (samplingPercentage >= 1.0 || random.nextDouble() <
samplingPercentage) {
- for (User user2 : cluster2) {
- double theSimilarity = similarity.userSimilarity(user1, user2);
- if (theSimilarity < leastSimilarity) {
- leastSimilarity = theSimilarity;
- }
+ Iterable<User> someUsers = SamplingIterable.maybeWrapIterable(cluster1,
samplingRate);
+ for (User user1 : someUsers) {
+ for (User user2 : cluster2) {
+ double theSimilarity = similarity.userSimilarity(user1, user2);
+ if (theSimilarity < leastSimilarity) {
+ leastSimilarity = theSimilarity;
}
}
}
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NearestNeighborClusterSimilarity.java
Fri May 1 17:38:13 2009
@@ -21,26 +21,23 @@
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
-import org.apache.mahout.cf.taste.impl.common.RandomUtils;
+import org.apache.mahout.cf.taste.impl.common.SamplingIterable;
import org.apache.mahout.cf.taste.model.User;
import java.util.Collection;
-import java.util.Random;
/**
* <p>Defines cluster similarity as the <em>largest</em> similarity between
any two
- * {...@link org.apache.mahout.cf.taste.model.User}s in the clusters -- that
is, it says that clusters are close
+ * {...@link User}s in the clusters -- that is, it says that clusters are close
* when <em>some pair</em> of their members has high similarity.</p>
*/
public final class NearestNeighborClusterSimilarity implements
ClusterSimilarity {
- private static final Random random = RandomUtils.getRandom();
-
private final UserSimilarity similarity;
- private final double samplingPercentage;
+ private final double samplingRate;
/**
- * <p>Constructs a {...@link NearestNeighborClusterSimilarity} based on the
given {...@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
+ * <p>Constructs a {...@link NearestNeighborClusterSimilarity} based on the
given {...@link UserSimilarity}.
* All user-user similarities are examined.</p>
*/
public NearestNeighborClusterSimilarity(UserSimilarity similarity) {
@@ -48,20 +45,20 @@
}
/**
- * <p>Constructs a {...@link NearestNeighborClusterSimilarity} based on the
given {...@link org.apache.mahout.cf.taste.similarity.UserSimilarity}.
- * By setting <code>samplingPercentage</code> to a value less than 1.0, this
implementation will only examine
+ * <p>Constructs a {...@link NearestNeighborClusterSimilarity} based on the
given {...@link UserSimilarity}.
+ * By setting <code>samplingRate</code> to a value less than 1.0, this
implementation will only examine
* that fraction of all user-user similarities between two clusters,
increasing performance at the expense
* of accuracy.</p>
*/
- public NearestNeighborClusterSimilarity(UserSimilarity similarity, double
samplingPercentage) {
+ public NearestNeighborClusterSimilarity(UserSimilarity similarity, double
samplingRate) {
if (similarity == null) {
throw new IllegalArgumentException("similarity is null");
}
- if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 ||
samplingPercentage > 1.0) {
- throw new IllegalArgumentException("samplingPercentage is invalid: " +
samplingPercentage);
+ if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate >
1.0) {
+ throw new IllegalArgumentException("samplingRate is invalid: " +
samplingRate);
}
this.similarity = similarity;
- this.samplingPercentage = samplingPercentage;
+ this.samplingRate = samplingRate;
}
@Override
@@ -70,14 +67,13 @@
if (cluster1.isEmpty() || cluster2.isEmpty()) {
return Double.NaN;
}
+ Iterable<User> someUsers = SamplingIterable.maybeWrapIterable(cluster1,
samplingRate);
double greatestSimilarity = Double.NEGATIVE_INFINITY;
- for (User user1 : cluster1) {
- if (samplingPercentage >= 1.0 || random.nextDouble() <
samplingPercentage) {
- for (User user2 : cluster2) {
- double theSimilarity = similarity.userSimilarity(user1, user2);
- if (theSimilarity > greatestSimilarity) {
- greatestSimilarity = theSimilarity;
- }
+ for (User user1 : someUsers) {
+ for (User user2 : cluster2) {
+ double theSimilarity = similarity.userSimilarity(user1, user2);
+ if (theSimilarity > greatestSimilarity) {
+ greatestSimilarity = theSimilarity;
}
}
}
Modified:
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
URL:
http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java?rev=770768&r1=770767&r2=770768&view=diff
==============================================================================
---
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
(original)
+++
lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TreeClusteringRecommender.java
Fri May 1 17:38:13 2009
@@ -60,13 +60,15 @@
*/
public final class TreeClusteringRecommender extends AbstractRecommender
implements ClusteringRecommender {
+ private static final Random r = RandomUtils.getRandom();
+
private static final Logger log =
LoggerFactory.getLogger(TreeClusteringRecommender.class);
private final ClusterSimilarity clusterSimilarity;
private final int numClusters;
private final double clusteringThreshold;
private final boolean clusteringByThreshold;
- private final double samplingPercentage;
+ private final double samplingRate;
private Map<Object, List<RecommendedItem>> topRecsByUserID;
private Collection<Collection<User>> allClusters;
private Map<Object, Collection<User>> clustersByUserID;
@@ -91,16 +93,16 @@
* @param dataModel {...@link DataModel} which provdes {...@link User}s
* @param clusterSimilarity {...@link ClusterSimilarity} used to compute
cluster similarity
* @param numClusters desired number of clusters to create
- * @param samplingPercentage percentage of all cluster-cluster pairs to
consider when finding
+ * @param samplingRate percentage of all cluster-cluster pairs to consider
when finding
* next-most-similar clusters. Decreasing this value from 1.0 can increase
performance at the
* cost of accuracy
* @throws IllegalArgumentException if arguments are <code>null</code>, or
<code>numClusters</code> is
- * less than 2, or samplingPercentage is {...@link Double#NaN} or
nonpositive or greater than 1.0
+ * less than 2, or samplingRate is {...@link Double#NaN} or nonpositive or
greater than 1.0
*/
public TreeClusteringRecommender(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
int numClusters,
- double samplingPercentage) {
+ double samplingRate) {
super(dataModel);
if (clusterSimilarity == null) {
throw new IllegalArgumentException("clusterSimilarity is null");
@@ -108,14 +110,14 @@
if (numClusters < 2) {
throw new IllegalArgumentException("numClusters must be at least 2");
}
- if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 ||
samplingPercentage > 1.0) {
- throw new IllegalArgumentException("samplingPercentage is invalid: " +
samplingPercentage);
+ if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate >
1.0) {
+ throw new IllegalArgumentException("samplingRate is invalid: " +
samplingRate);
}
this.clusterSimilarity = clusterSimilarity;
this.numClusters = numClusters;
this.clusteringThreshold = Double.NaN;
this.clusteringByThreshold = false;
- this.samplingPercentage = samplingPercentage;
+ this.samplingRate = samplingRate;
this.buildClustersLock = new ReentrantLock();
this.refreshHelper = new RefreshHelper(new Callable<Object>() {
@Override
@@ -147,16 +149,16 @@
* @param clusterSimilarity {...@link ClusterSimilarity} used to compute
cluster similarity
* @param clusteringThreshold clustering similarity threshold; clusters will
be aggregated into larger
* clusters until the next two nearest clusters' similarity drops below this
threshold
- * @param samplingPercentage percentage of all cluster-cluster pairs to
consider when finding
+ * @param samplingRate percentage of all cluster-cluster pairs to consider
when finding
* next-most-similar clusters. Decreasing this value from 1.0 can increase
performance at the
* cost of accuracy
* @throws IllegalArgumentException if arguments are <code>null</code>, or
<code>clusteringThreshold</code> is
- * {...@link Double#NaN}, or samplingPercentage is {...@link Double#NaN} or
nonpositive or greater than 1.0
+ * {...@link Double#NaN}, or samplingRate is {...@link Double#NaN} or
nonpositive or greater than 1.0
*/
public TreeClusteringRecommender(DataModel dataModel,
ClusterSimilarity clusterSimilarity,
double clusteringThreshold,
- double samplingPercentage) {
+ double samplingRate) {
super(dataModel);
if (clusterSimilarity == null) {
throw new IllegalArgumentException("clusterSimilarity is null");
@@ -164,14 +166,14 @@
if (Double.isNaN(clusteringThreshold)) {
throw new IllegalArgumentException("clusteringThreshold must not be
NaN");
}
- if (Double.isNaN(samplingPercentage) || samplingPercentage <= 0.0 ||
samplingPercentage > 1.0) {
- throw new IllegalArgumentException("samplingPercentage is invalid: " +
samplingPercentage);
+ if (Double.isNaN(samplingRate) || samplingRate <= 0.0 || samplingRate >
1.0) {
+ throw new IllegalArgumentException("samplingRate is invalid: " +
samplingRate);
}
this.clusterSimilarity = clusterSimilarity;
this.numClusters = Integer.MIN_VALUE;
this.clusteringThreshold = clusteringThreshold;
this.clusteringByThreshold = true;
- this.samplingPercentage = samplingPercentage;
+ this.samplingRate = samplingRate;
this.buildClustersLock = new ReentrantLock();
this.refreshHelper = new RefreshHelper(new Callable<Object>() {
@Override
@@ -345,11 +347,10 @@
int size = clusters.size();
Pair<Collection<User>, Collection<User>> nearestPair = null;
double bestSimilarity = Double.NEGATIVE_INFINITY;
- Random r = RandomUtils.getRandom();
for (int i = 0; i < size; i++) {
Collection<User> cluster1 = clusters.get(i);
for (int j = i + 1; j < size; j++) {
- if (samplingPercentage >= 1.0 || r.nextDouble() < samplingPercentage) {
+ if (samplingRate >= 1.0 || r.nextDouble() < samplingRate) {
Collection<User> cluster2 = clusters.get(j);
double similarity = clusterSimilarity.getSimilarity(cluster1,
cluster2);
if (!Double.isNaN(similarity) && similarity > bestSimilarity) {