http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java new file mode 100644 index 0000000..15fcc9f --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefUserBasedRecommender.java @@ -0,0 +1,82 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +/** + * A variant on {@link GenericUserBasedRecommender} which is appropriate for use when no notion of preference + * value exists in the data. + */ +public final class GenericBooleanPrefUserBasedRecommender extends GenericUserBasedRecommender { + + public GenericBooleanPrefUserBasedRecommender(DataModel dataModel, + UserNeighborhood neighborhood, + UserSimilarity similarity) { + super(dataModel, neighborhood, similarity); + } + + /** + * This computation is in a technical sense, wrong, since in the domain of "boolean preference users" where + * all preference values are 1, this method should only ever return 1.0 or NaN. This isn't terribly useful + * however since it means results can't be ranked by preference value (all are 1). So instead this returns a + * sum of similarities to any other user in the neighborhood who has also rated the item. + */ + @Override + protected float doEstimatePreference(long theUserID, long[] theNeighborhood, long itemID) throws TasteException { + if (theNeighborhood.length == 0) { + return Float.NaN; + } + DataModel dataModel = getDataModel(); + UserSimilarity similarity = getSimilarity(); + float totalSimilarity = 0.0f; + boolean foundAPref = false; + for (long userID : theNeighborhood) { + // See GenericItemBasedRecommender.doEstimatePreference() too + if (userID != theUserID && dataModel.getPreferenceValue(userID, itemID) != null) { + foundAPref = true; + totalSimilarity += (float) similarity.userSimilarity(theUserID, userID); + } + } + return foundAPref ? totalSimilarity : Float.NaN; + } + + @Override + protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID, boolean includeKnownItems) + throws TasteException { + DataModel dataModel = getDataModel(); + FastIDSet possibleItemIDs = new FastIDSet(); + for (long userID : theNeighborhood) { + possibleItemIDs.addAll(dataModel.getItemIDsFromUser(userID)); + } + if (!includeKnownItems) { + possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(theUserID)); + } + return possibleItemIDs; + } + + @Override + public String toString() { + return "GenericBooleanPrefUserBasedRecommender"; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java new file mode 100644 index 0000000..413db4b --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericItemBasedRecommender.java @@ -0,0 +1,378 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.impl.common.RunningAverage; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; +import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Rescorer; +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; +import org.apache.mahout.common.LongPair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple {@link org.apache.mahout.cf.taste.recommender.Recommender} which uses a given + * {@link org.apache.mahout.cf.taste.model.DataModel} and + * {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} to produce recommendations. This class + * represents Taste's support for item-based recommenders. + * </p> + * + * <p> + * The {@link org.apache.mahout.cf.taste.similarity.ItemSimilarity} is the most important point to discuss + * here. Item-based recommenders are useful because they can take advantage of something to be very fast: they + * base their computations on item similarity, not user similarity, and item similarity is relatively static. + * It can be precomputed, instead of re-computed in real time. + * </p> + * + * <p> + * Thus it's strongly recommended that you use + * {@link org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity} with pre-computed similarities if + * you're going to use this class. You can use + * {@link org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity} too, which computes + * similarities in real-time, but will probably find this painfully slow for large amounts of data. + * </p> + */ +public class GenericItemBasedRecommender extends AbstractRecommender implements ItemBasedRecommender { + + private static final Logger log = LoggerFactory.getLogger(GenericItemBasedRecommender.class); + + private final ItemSimilarity similarity; + private final MostSimilarItemsCandidateItemsStrategy mostSimilarItemsCandidateItemsStrategy; + private final RefreshHelper refreshHelper; + private EstimatedPreferenceCapper capper; + + private static final boolean EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT = true; + + public GenericItemBasedRecommender(DataModel dataModel, + ItemSimilarity similarity, + CandidateItemsStrategy candidateItemsStrategy, + MostSimilarItemsCandidateItemsStrategy mostSimilarItemsCandidateItemsStrategy) { + super(dataModel, candidateItemsStrategy); + Preconditions.checkArgument(similarity != null, "similarity is null"); + this.similarity = similarity; + Preconditions.checkArgument(mostSimilarItemsCandidateItemsStrategy != null, + "mostSimilarItemsCandidateItemsStrategy is null"); + this.mostSimilarItemsCandidateItemsStrategy = mostSimilarItemsCandidateItemsStrategy; + this.refreshHelper = new RefreshHelper(new Callable<Void>() { + @Override + public Void call() { + capper = buildCapper(); + return null; + } + }); + refreshHelper.addDependency(dataModel); + refreshHelper.addDependency(similarity); + refreshHelper.addDependency(candidateItemsStrategy); + refreshHelper.addDependency(mostSimilarItemsCandidateItemsStrategy); + capper = buildCapper(); + } + + public GenericItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity) { + this(dataModel, + similarity, + AbstractRecommender.getDefaultCandidateItemsStrategy(), + getDefaultMostSimilarItemsCandidateItemsStrategy()); + } + + protected static MostSimilarItemsCandidateItemsStrategy getDefaultMostSimilarItemsCandidateItemsStrategy() { + return new PreferredItemsNeighborhoodCandidateItemsStrategy(); + } + + public ItemSimilarity getSimilarity() { + return similarity; + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + log.debug("Recommending items for user ID '{}'", userID); + + PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); + if (preferencesFromUser.length() == 0) { + return Collections.emptyList(); + } + + FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems); + + TopItems.Estimator<Long> estimator = new Estimator(userID, preferencesFromUser); + + List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, + estimator); + + log.debug("Recommendations are: {}", topItems); + return topItems; + } + + @Override + public float estimatePreference(long userID, long itemID) throws TasteException { + PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); + Float actualPref = getPreferenceForItem(preferencesFromUser, itemID); + if (actualPref != null) { + return actualPref; + } + return doEstimatePreference(userID, preferencesFromUser, itemID); + } + + private static Float getPreferenceForItem(PreferenceArray preferencesFromUser, long itemID) { + int size = preferencesFromUser.length(); + for (int i = 0; i < size; i++) { + if (preferencesFromUser.getItemID(i) == itemID) { + return preferencesFromUser.getValue(i); + } + } + return null; + } + + @Override + public List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException { + return mostSimilarItems(itemID, howMany, null); + } + + @Override + public List<RecommendedItem> mostSimilarItems(long itemID, int howMany, + Rescorer<LongPair> rescorer) throws TasteException { + TopItems.Estimator<Long> estimator = new MostSimilarEstimator(itemID, similarity, rescorer); + return doMostSimilarItems(new long[] {itemID}, howMany, estimator); + } + + @Override + public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException { + TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, null, + EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT); + return doMostSimilarItems(itemIDs, howMany, estimator); + } + + @Override + public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany, + Rescorer<LongPair> rescorer) throws TasteException { + TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, rescorer, + EXCLUDE_ITEM_IF_NOT_SIMILAR_TO_ALL_BY_DEFAULT); + return doMostSimilarItems(itemIDs, howMany, estimator); + } + + @Override + public List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + boolean excludeItemIfNotSimilarToAll) throws TasteException { + TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, null, + excludeItemIfNotSimilarToAll); + return doMostSimilarItems(itemIDs, howMany, estimator); + } + + @Override + public List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany, + Rescorer<LongPair> rescorer, + boolean excludeItemIfNotSimilarToAll) throws TasteException { + TopItems.Estimator<Long> estimator = new MultiMostSimilarEstimator(itemIDs, similarity, rescorer, + excludeItemIfNotSimilarToAll); + return doMostSimilarItems(itemIDs, howMany, estimator); + } + + @Override + public List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + + DataModel model = getDataModel(); + TopItems.Estimator<Long> estimator = new RecommendedBecauseEstimator(userID, itemID); + + PreferenceArray prefs = model.getPreferencesFromUser(userID); + int size = prefs.length(); + FastIDSet allUserItems = new FastIDSet(size); + for (int i = 0; i < size; i++) { + allUserItems.add(prefs.getItemID(i)); + } + allUserItems.remove(itemID); + + return TopItems.getTopItems(howMany, allUserItems.iterator(), null, estimator); + } + + private List<RecommendedItem> doMostSimilarItems(long[] itemIDs, + int howMany, + TopItems.Estimator<Long> estimator) throws TasteException { + FastIDSet possibleItemIDs = mostSimilarItemsCandidateItemsStrategy.getCandidateItems(itemIDs, getDataModel()); + return TopItems.getTopItems(howMany, possibleItemIDs.iterator(), null, estimator); + } + + protected float doEstimatePreference(long userID, PreferenceArray preferencesFromUser, long itemID) + throws TasteException { + double preference = 0.0; + double totalSimilarity = 0.0; + int count = 0; + double[] similarities = similarity.itemSimilarities(itemID, preferencesFromUser.getIDs()); + for (int i = 0; i < similarities.length; i++) { + double theSimilarity = similarities[i]; + if (!Double.isNaN(theSimilarity)) { + // Weights can be negative! + preference += theSimilarity * preferencesFromUser.getValue(i); + totalSimilarity += theSimilarity; + count++; + } + } + // Throw out the estimate if it was based on no data points, of course, but also if based on + // just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment. + // The reason is that in this case the estimate is, simply, the user's rating for one item + // that happened to have a defined similarity. The similarity score doesn't matter, and that + // seems like a bad situation. + if (count <= 1) { + return Float.NaN; + } + float estimate = (float) (preference / totalSimilarity); + if (capper != null) { + estimate = capper.capEstimate(estimate); + } + return estimate; + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + + @Override + public String toString() { + return "GenericItemBasedRecommender[similarity:" + similarity + ']'; + } + + private EstimatedPreferenceCapper buildCapper() { + DataModel dataModel = getDataModel(); + if (Float.isNaN(dataModel.getMinPreference()) && Float.isNaN(dataModel.getMaxPreference())) { + return null; + } else { + return new EstimatedPreferenceCapper(dataModel); + } + } + + public static class MostSimilarEstimator implements TopItems.Estimator<Long> { + + private final long toItemID; + private final ItemSimilarity similarity; + private final Rescorer<LongPair> rescorer; + + public MostSimilarEstimator(long toItemID, ItemSimilarity similarity, Rescorer<LongPair> rescorer) { + this.toItemID = toItemID; + this.similarity = similarity; + this.rescorer = rescorer; + } + + @Override + public double estimate(Long itemID) throws TasteException { + LongPair pair = new LongPair(toItemID, itemID); + if (rescorer != null && rescorer.isFiltered(pair)) { + return Double.NaN; + } + double originalEstimate = similarity.itemSimilarity(toItemID, itemID); + return rescorer == null ? originalEstimate : rescorer.rescore(pair, originalEstimate); + } + } + + private final class Estimator implements TopItems.Estimator<Long> { + + private final long userID; + private final PreferenceArray preferencesFromUser; + + private Estimator(long userID, PreferenceArray preferencesFromUser) { + this.userID = userID; + this.preferencesFromUser = preferencesFromUser; + } + + @Override + public double estimate(Long itemID) throws TasteException { + return doEstimatePreference(userID, preferencesFromUser, itemID); + } + } + + private static final class MultiMostSimilarEstimator implements TopItems.Estimator<Long> { + + private final long[] toItemIDs; + private final ItemSimilarity similarity; + private final Rescorer<LongPair> rescorer; + private final boolean excludeItemIfNotSimilarToAll; + + private MultiMostSimilarEstimator(long[] toItemIDs, ItemSimilarity similarity, Rescorer<LongPair> rescorer, + boolean excludeItemIfNotSimilarToAll) { + this.toItemIDs = toItemIDs; + this.similarity = similarity; + this.rescorer = rescorer; + this.excludeItemIfNotSimilarToAll = excludeItemIfNotSimilarToAll; + } + + @Override + public double estimate(Long itemID) throws TasteException { + RunningAverage average = new FullRunningAverage(); + double[] similarities = similarity.itemSimilarities(itemID, toItemIDs); + for (int i = 0; i < toItemIDs.length; i++) { + long toItemID = toItemIDs[i]; + LongPair pair = new LongPair(toItemID, itemID); + if (rescorer != null && rescorer.isFiltered(pair)) { + continue; + } + double estimate = similarities[i]; + if (rescorer != null) { + estimate = rescorer.rescore(pair, estimate); + } + if (excludeItemIfNotSimilarToAll || !Double.isNaN(estimate)) { + average.addDatum(estimate); + } + } + double averageEstimate = average.getAverage(); + return averageEstimate == 0 ? Double.NaN : averageEstimate; + } + } + + private final class RecommendedBecauseEstimator implements TopItems.Estimator<Long> { + + private final long userID; + private final long recommendedItemID; + + private RecommendedBecauseEstimator(long userID, long recommendedItemID) { + this.userID = userID; + this.recommendedItemID = recommendedItemID; + } + + @Override + public double estimate(Long itemID) throws TasteException { + Float pref = getDataModel().getPreferenceValue(userID, itemID); + if (pref == null) { + return Float.NaN; + } + double similarityValue = similarity.itemSimilarity(recommendedItemID, itemID); + return (1.0 + similarityValue) * pref; + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java new file mode 100644 index 0000000..8c8f6ce --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericRecommendedItem.java @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.io.Serializable; + +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.common.RandomUtils; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple implementation of {@link RecommendedItem}. + * </p> + */ +public final class GenericRecommendedItem implements RecommendedItem, Serializable { + + private final long itemID; + private final float value; + + /** + * @throws IllegalArgumentException + * if item is null or value is NaN + */ + public GenericRecommendedItem(long itemID, float value) { + Preconditions.checkArgument(!Float.isNaN(value), "value is NaN"); + this.itemID = itemID; + this.value = value; + } + + @Override + public long getItemID() { + return itemID; + } + + @Override + public float getValue() { + return value; + } + + @Override + public String toString() { + return "RecommendedItem[item:" + itemID + ", value:" + value + ']'; + } + + @Override + public int hashCode() { + return (int) itemID ^ RandomUtils.hashFloat(value); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof GenericRecommendedItem)) { + return false; + } + RecommendedItem other = (RecommendedItem) o; + return itemID == other.getItemID() && value == other.getValue(); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java new file mode 100644 index 0000000..1e2ef73 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericUserBasedRecommender.java @@ -0,0 +1,247 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Rescorer; +import org.apache.mahout.cf.taste.recommender.UserBasedRecommender; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; +import org.apache.mahout.common.LongPair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple {@link org.apache.mahout.cf.taste.recommender.Recommender} + * which uses a given {@link DataModel} and {@link UserNeighborhood} to produce recommendations. + * </p> + */ +public class GenericUserBasedRecommender extends AbstractRecommender implements UserBasedRecommender { + + private static final Logger log = LoggerFactory.getLogger(GenericUserBasedRecommender.class); + + private final UserNeighborhood neighborhood; + private final UserSimilarity similarity; + private final RefreshHelper refreshHelper; + private EstimatedPreferenceCapper capper; + + public GenericUserBasedRecommender(DataModel dataModel, + UserNeighborhood neighborhood, + UserSimilarity similarity) { + super(dataModel); + Preconditions.checkArgument(neighborhood != null, "neighborhood is null"); + this.neighborhood = neighborhood; + this.similarity = similarity; + this.refreshHelper = new RefreshHelper(new Callable<Void>() { + @Override + public Void call() { + capper = buildCapper(); + return null; + } + }); + refreshHelper.addDependency(dataModel); + refreshHelper.addDependency(similarity); + refreshHelper.addDependency(neighborhood); + capper = buildCapper(); + } + + public UserSimilarity getSimilarity() { + return similarity; + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + + log.debug("Recommending items for user ID '{}'", userID); + + long[] theNeighborhood = neighborhood.getUserNeighborhood(userID); + + if (theNeighborhood.length == 0) { + return Collections.emptyList(); + } + + FastIDSet allItemIDs = getAllOtherItems(theNeighborhood, userID, includeKnownItems); + + TopItems.Estimator<Long> estimator = new Estimator(userID, theNeighborhood); + + List<RecommendedItem> topItems = TopItems + .getTopItems(howMany, allItemIDs.iterator(), rescorer, estimator); + + log.debug("Recommendations are: {}", topItems); + return topItems; + } + + @Override + public float estimatePreference(long userID, long itemID) throws TasteException { + DataModel model = getDataModel(); + Float actualPref = model.getPreferenceValue(userID, itemID); + if (actualPref != null) { + return actualPref; + } + long[] theNeighborhood = neighborhood.getUserNeighborhood(userID); + return doEstimatePreference(userID, theNeighborhood, itemID); + } + + @Override + public long[] mostSimilarUserIDs(long userID, int howMany) throws TasteException { + return mostSimilarUserIDs(userID, howMany, null); + } + + @Override + public long[] mostSimilarUserIDs(long userID, int howMany, Rescorer<LongPair> rescorer) throws TasteException { + TopItems.Estimator<Long> estimator = new MostSimilarEstimator(userID, similarity, rescorer); + return doMostSimilarUsers(howMany, estimator); + } + + private long[] doMostSimilarUsers(int howMany, TopItems.Estimator<Long> estimator) throws TasteException { + DataModel model = getDataModel(); + return TopItems.getTopUsers(howMany, model.getUserIDs(), null, estimator); + } + + protected float doEstimatePreference(long theUserID, long[] theNeighborhood, long itemID) throws TasteException { + if (theNeighborhood.length == 0) { + return Float.NaN; + } + DataModel dataModel = getDataModel(); + double preference = 0.0; + double totalSimilarity = 0.0; + int count = 0; + for (long userID : theNeighborhood) { + if (userID != theUserID) { + // See GenericItemBasedRecommender.doEstimatePreference() too + Float pref = dataModel.getPreferenceValue(userID, itemID); + if (pref != null) { + double theSimilarity = similarity.userSimilarity(theUserID, userID); + if (!Double.isNaN(theSimilarity)) { + preference += theSimilarity * pref; + totalSimilarity += theSimilarity; + count++; + } + } + } + } + // Throw out the estimate if it was based on no data points, of course, but also if based on + // just one. This is a bit of a band-aid on the 'stock' item-based algorithm for the moment. + // The reason is that in this case the estimate is, simply, the user's rating for one item + // that happened to have a defined similarity. The similarity score doesn't matter, and that + // seems like a bad situation. + if (count <= 1) { + return Float.NaN; + } + float estimate = (float) (preference / totalSimilarity); + if (capper != null) { + estimate = capper.capEstimate(estimate); + } + return estimate; + } + + protected FastIDSet getAllOtherItems(long[] theNeighborhood, long theUserID, boolean includeKnownItems) + throws TasteException { + DataModel dataModel = getDataModel(); + FastIDSet possibleItemIDs = new FastIDSet(); + for (long userID : theNeighborhood) { + possibleItemIDs.addAll(dataModel.getItemIDsFromUser(userID)); + } + if (!includeKnownItems) { + possibleItemIDs.removeAll(dataModel.getItemIDsFromUser(theUserID)); + } + return possibleItemIDs; + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + + @Override + public String toString() { + return "GenericUserBasedRecommender[neighborhood:" + neighborhood + ']'; + } + + private EstimatedPreferenceCapper buildCapper() { + DataModel dataModel = getDataModel(); + if (Float.isNaN(dataModel.getMinPreference()) && Float.isNaN(dataModel.getMaxPreference())) { + return null; + } else { + return new EstimatedPreferenceCapper(dataModel); + } + } + + private static final class MostSimilarEstimator implements TopItems.Estimator<Long> { + + private final long toUserID; + private final UserSimilarity similarity; + private final Rescorer<LongPair> rescorer; + + private MostSimilarEstimator(long toUserID, UserSimilarity similarity, Rescorer<LongPair> rescorer) { + this.toUserID = toUserID; + this.similarity = similarity; + this.rescorer = rescorer; + } + + @Override + public double estimate(Long userID) throws TasteException { + // Don't consider the user itself as a possible most similar user + if (userID == toUserID) { + return Double.NaN; + } + if (rescorer == null) { + return similarity.userSimilarity(toUserID, userID); + } else { + LongPair pair = new LongPair(toUserID, userID); + if (rescorer.isFiltered(pair)) { + return Double.NaN; + } + double originalEstimate = similarity.userSimilarity(toUserID, userID); + return rescorer.rescore(pair, originalEstimate); + } + } + } + + private final class Estimator implements TopItems.Estimator<Long> { + + private final long theUserID; + private final long[] theNeighborhood; + + Estimator(long theUserID, long[] theNeighborhood) { + this.theUserID = theUserID; + this.theNeighborhood = theNeighborhood; + } + + @Override + public double estimate(Long itemID) throws TasteException { + return doEstimatePreference(theUserID, theNeighborhood, itemID); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java new file mode 100644 index 0000000..618c65f --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemAverageRecommender.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.mahout.cf.taste.common.NoSuchUserException; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.impl.common.RunningAverage; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple recommender that always estimates preference for an item to be the average of all known preference + * values for that item. No information about users is taken into account. This implementation is provided for + * experimentation; while simple and fast, it may not produce very good recommendations. + * </p> + */ +public final class ItemAverageRecommender extends AbstractRecommender { + + private static final Logger log = LoggerFactory.getLogger(ItemAverageRecommender.class); + + private final FastByIDMap<RunningAverage> itemAverages; + private final ReadWriteLock buildAveragesLock; + private final RefreshHelper refreshHelper; + + public ItemAverageRecommender(DataModel dataModel) throws TasteException { + super(dataModel); + this.itemAverages = new FastByIDMap<>(); + this.buildAveragesLock = new ReentrantReadWriteLock(); + this.refreshHelper = new RefreshHelper(new Callable<Object>() { + @Override + public Object call() throws TasteException { + buildAverageDiffs(); + return null; + } + }); + refreshHelper.addDependency(dataModel); + buildAverageDiffs(); + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + log.debug("Recommending items for user ID '{}'", userID); + + PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); + FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems); + + TopItems.Estimator<Long> estimator = new Estimator(); + + List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, + estimator); + + log.debug("Recommendations are: {}", topItems); + return topItems; + } + + @Override + public float estimatePreference(long userID, long itemID) throws TasteException { + DataModel dataModel = getDataModel(); + Float actualPref = dataModel.getPreferenceValue(userID, itemID); + if (actualPref != null) { + return actualPref; + } + return doEstimatePreference(itemID); + } + + private float doEstimatePreference(long itemID) { + buildAveragesLock.readLock().lock(); + try { + RunningAverage average = itemAverages.get(itemID); + return average == null ? Float.NaN : (float) average.getAverage(); + } finally { + buildAveragesLock.readLock().unlock(); + } + } + + private void buildAverageDiffs() throws TasteException { + try { + buildAveragesLock.writeLock().lock(); + DataModel dataModel = getDataModel(); + LongPrimitiveIterator it = dataModel.getUserIDs(); + while (it.hasNext()) { + PreferenceArray prefs = dataModel.getPreferencesFromUser(it.nextLong()); + int size = prefs.length(); + for (int i = 0; i < size; i++) { + long itemID = prefs.getItemID(i); + RunningAverage average = itemAverages.get(itemID); + if (average == null) { + average = new FullRunningAverage(); + itemAverages.put(itemID, average); + } + average.addDatum(prefs.getValue(i)); + } + } + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + + @Override + public void setPreference(long userID, long itemID, float value) throws TasteException { + DataModel dataModel = getDataModel(); + double prefDelta; + try { + Float oldPref = dataModel.getPreferenceValue(userID, itemID); + prefDelta = oldPref == null ? value : value - oldPref; + } catch (NoSuchUserException nsee) { + prefDelta = value; + } + super.setPreference(userID, itemID, value); + try { + buildAveragesLock.writeLock().lock(); + RunningAverage average = itemAverages.get(itemID); + if (average == null) { + RunningAverage newAverage = new FullRunningAverage(); + newAverage.addDatum(prefDelta); + itemAverages.put(itemID, newAverage); + } else { + average.changeDatum(prefDelta); + } + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + + @Override + public void removePreference(long userID, long itemID) throws TasteException { + DataModel dataModel = getDataModel(); + Float oldPref = dataModel.getPreferenceValue(userID, itemID); + super.removePreference(userID, itemID); + if (oldPref != null) { + try { + buildAveragesLock.writeLock().lock(); + RunningAverage average = itemAverages.get(itemID); + if (average == null) { + throw new IllegalStateException("No preferences exist for item ID: " + itemID); + } else { + average.removeDatum(oldPref); + } + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + + @Override + public String toString() { + return "ItemAverageRecommender"; + } + + private final class Estimator implements TopItems.Estimator<Long> { + + @Override + public double estimate(Long itemID) { + return doEstimatePreference(itemID); + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java new file mode 100644 index 0000000..b2bcd24 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ItemUserAverageRecommender.java @@ -0,0 +1,240 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collection; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.locks.ReadWriteLock; +import java.util.concurrent.locks.ReentrantReadWriteLock; + +import org.apache.mahout.cf.taste.common.NoSuchUserException; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.impl.common.RunningAverage; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * Like {@link ItemAverageRecommender}, except that estimated preferences are adjusted for the users' average + * preference value. For example, say user X has not rated item Y. Item Y's average preference value is 3.5. + * User X's average preference value is 4.2, and the average over all preference values is 4.0. User X prefers + * items 0.2 higher on average, so, the estimated preference for user X, item Y is 3.5 + 0.2 = 3.7. + * </p> + */ +public final class ItemUserAverageRecommender extends AbstractRecommender { + + private static final Logger log = LoggerFactory.getLogger(ItemUserAverageRecommender.class); + + private final FastByIDMap<RunningAverage> itemAverages; + private final FastByIDMap<RunningAverage> userAverages; + private final RunningAverage overallAveragePrefValue; + private final ReadWriteLock buildAveragesLock; + private final RefreshHelper refreshHelper; + + public ItemUserAverageRecommender(DataModel dataModel) throws TasteException { + super(dataModel); + this.itemAverages = new FastByIDMap<>(); + this.userAverages = new FastByIDMap<>(); + this.overallAveragePrefValue = new FullRunningAverage(); + this.buildAveragesLock = new ReentrantReadWriteLock(); + this.refreshHelper = new RefreshHelper(new Callable<Object>() { + @Override + public Object call() throws TasteException { + buildAverageDiffs(); + return null; + } + }); + refreshHelper.addDependency(dataModel); + buildAverageDiffs(); + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + log.debug("Recommending items for user ID '{}'", userID); + + PreferenceArray preferencesFromUser = getDataModel().getPreferencesFromUser(userID); + FastIDSet possibleItemIDs = getAllOtherItems(userID, preferencesFromUser, includeKnownItems); + + TopItems.Estimator<Long> estimator = new Estimator(userID); + + List<RecommendedItem> topItems = TopItems.getTopItems(howMany, possibleItemIDs.iterator(), rescorer, + estimator); + + log.debug("Recommendations are: {}", topItems); + return topItems; + } + + @Override + public float estimatePreference(long userID, long itemID) throws TasteException { + DataModel dataModel = getDataModel(); + Float actualPref = dataModel.getPreferenceValue(userID, itemID); + if (actualPref != null) { + return actualPref; + } + return doEstimatePreference(userID, itemID); + } + + private float doEstimatePreference(long userID, long itemID) { + buildAveragesLock.readLock().lock(); + try { + RunningAverage itemAverage = itemAverages.get(itemID); + if (itemAverage == null) { + return Float.NaN; + } + RunningAverage userAverage = userAverages.get(userID); + if (userAverage == null) { + return Float.NaN; + } + double userDiff = userAverage.getAverage() - overallAveragePrefValue.getAverage(); + return (float) (itemAverage.getAverage() + userDiff); + } finally { + buildAveragesLock.readLock().unlock(); + } + } + + private void buildAverageDiffs() throws TasteException { + try { + buildAveragesLock.writeLock().lock(); + DataModel dataModel = getDataModel(); + LongPrimitiveIterator it = dataModel.getUserIDs(); + while (it.hasNext()) { + long userID = it.nextLong(); + PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); + int size = prefs.length(); + for (int i = 0; i < size; i++) { + long itemID = prefs.getItemID(i); + float value = prefs.getValue(i); + addDatumAndCreateIfNeeded(itemID, value, itemAverages); + addDatumAndCreateIfNeeded(userID, value, userAverages); + overallAveragePrefValue.addDatum(value); + } + } + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + + private static void addDatumAndCreateIfNeeded(long itemID, float value, FastByIDMap<RunningAverage> averages) { + RunningAverage itemAverage = averages.get(itemID); + if (itemAverage == null) { + itemAverage = new FullRunningAverage(); + averages.put(itemID, itemAverage); + } + itemAverage.addDatum(value); + } + + @Override + public void setPreference(long userID, long itemID, float value) throws TasteException { + DataModel dataModel = getDataModel(); + double prefDelta; + try { + Float oldPref = dataModel.getPreferenceValue(userID, itemID); + prefDelta = oldPref == null ? value : value - oldPref; + } catch (NoSuchUserException nsee) { + prefDelta = value; + } + super.setPreference(userID, itemID, value); + try { + buildAveragesLock.writeLock().lock(); + RunningAverage itemAverage = itemAverages.get(itemID); + if (itemAverage == null) { + RunningAverage newItemAverage = new FullRunningAverage(); + newItemAverage.addDatum(prefDelta); + itemAverages.put(itemID, newItemAverage); + } else { + itemAverage.changeDatum(prefDelta); + } + RunningAverage userAverage = userAverages.get(userID); + if (userAverage == null) { + RunningAverage newUserAveragae = new FullRunningAverage(); + newUserAveragae.addDatum(prefDelta); + userAverages.put(userID, newUserAveragae); + } else { + userAverage.changeDatum(prefDelta); + } + overallAveragePrefValue.changeDatum(prefDelta); + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + + @Override + public void removePreference(long userID, long itemID) throws TasteException { + DataModel dataModel = getDataModel(); + Float oldPref = dataModel.getPreferenceValue(userID, itemID); + super.removePreference(userID, itemID); + if (oldPref != null) { + try { + buildAveragesLock.writeLock().lock(); + RunningAverage itemAverage = itemAverages.get(itemID); + if (itemAverage == null) { + throw new IllegalStateException("No preferences exist for item ID: " + itemID); + } + itemAverage.removeDatum(oldPref); + RunningAverage userAverage = userAverages.get(userID); + if (userAverage == null) { + throw new IllegalStateException("No preferences exist for user ID: " + userID); + } + userAverage.removeDatum(oldPref); + overallAveragePrefValue.removeDatum(oldPref); + } finally { + buildAveragesLock.writeLock().unlock(); + } + } + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + + @Override + public String toString() { + return "ItemUserAverageRecommender"; + } + + private final class Estimator implements TopItems.Estimator<Long> { + + private final long userID; + + private Estimator(long userID) { + this.userID = userID; + } + + @Override + public double estimate(Long itemID) { + return doEstimatePreference(userID, itemID); + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java new file mode 100644 index 0000000..14e9ec6 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/NullRescorer.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.Rescorer; +import org.apache.mahout.common.LongPair; + +/** + * <p> + * A simple {@link Rescorer} which always returns the original score. + * </p> + */ +public final class NullRescorer<T> implements Rescorer<T>, IDRescorer { + + private static final IDRescorer USER_OR_ITEM_INSTANCE = new NullRescorer<Long>(); + private static final Rescorer<LongPair> ITEM_ITEM_PAIR_INSTANCE = new NullRescorer<>(); + private static final Rescorer<LongPair> USER_USER_PAIR_INSTANCE = new NullRescorer<>(); + + private NullRescorer() { + } + + public static IDRescorer getItemInstance() { + return USER_OR_ITEM_INSTANCE; + } + + public static IDRescorer getUserInstance() { + return USER_OR_ITEM_INSTANCE; + } + + public static Rescorer<LongPair> getItemItemPairInstance() { + return ITEM_ITEM_PAIR_INSTANCE; + } + + public static Rescorer<LongPair> getUserUserPairInstance() { + return USER_USER_PAIR_INSTANCE; + } + + /** + * @param thing + * to rescore + * @param originalScore + * current score for item + * @return same originalScore as new score, always + */ + @Override + public double rescore(T thing, double originalScore) { + return originalScore; + } + + @Override + public boolean isFiltered(T thing) { + return false; + } + + @Override + public double rescore(long id, double originalScore) { + return originalScore; + } + + @Override + public boolean isFiltered(long id) { + return false; + } + + @Override + public String toString() { + return "NullRescorer"; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java new file mode 100644 index 0000000..6297d0b --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/PreferredItemsNeighborhoodCandidateItemsStrategy.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; + +public final class PreferredItemsNeighborhoodCandidateItemsStrategy extends AbstractCandidateItemsStrategy { + + /** + * returns all items that have not been rated by the user and that were preferred by another user + * that has preferred at least one item that the current user has preferred too + */ + @Override + protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems) + throws TasteException { + FastIDSet possibleItemsIDs = new FastIDSet(); + for (long itemID : preferredItemIDs) { + PreferenceArray itemPreferences = dataModel.getPreferencesForItem(itemID); + int numUsersPreferringItem = itemPreferences.length(); + for (int index = 0; index < numUsersPreferringItem; index++) { + possibleItemsIDs.addAll(dataModel.getItemIDsFromUser(itemPreferences.getUserID(index))); + } + } + if (!includeKnownItems) { + possibleItemsIDs.removeAll(preferredItemIDs); + } + return possibleItemsIDs; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java new file mode 100644 index 0000000..ef11f0d --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/RandomRecommender.java @@ -0,0 +1,97 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collection; +import java.util.List; +import java.util.Random; + +import com.google.common.collect.Lists; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.common.RandomUtils; + +/** + * Produces random recommendations and preference estimates. This is likely only useful as a novelty and for + * benchmarking. + */ +public final class RandomRecommender extends AbstractRecommender { + + private final Random random = RandomUtils.getRandom(); + private final float minPref; + private final float maxPref; + + public RandomRecommender(DataModel dataModel) throws TasteException { + super(dataModel); + float maxPref = Float.NEGATIVE_INFINITY; + float minPref = Float.POSITIVE_INFINITY; + LongPrimitiveIterator userIterator = dataModel.getUserIDs(); + while (userIterator.hasNext()) { + long userID = userIterator.next(); + PreferenceArray prefs = dataModel.getPreferencesFromUser(userID); + for (int i = 0; i < prefs.length(); i++) { + float prefValue = prefs.getValue(i); + if (prefValue < minPref) { + minPref = prefValue; + } + if (prefValue > maxPref) { + maxPref = prefValue; + } + } + } + this.minPref = minPref; + this.maxPref = maxPref; + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + DataModel dataModel = getDataModel(); + int numItems = dataModel.getNumItems(); + List<RecommendedItem> result = Lists.newArrayListWithCapacity(howMany); + while (result.size() < howMany) { + LongPrimitiveIterator it = dataModel.getItemIDs(); + it.skip(random.nextInt(numItems)); + long itemID = it.next(); + if (includeKnownItems || dataModel.getPreferenceValue(userID, itemID) == null) { + result.add(new GenericRecommendedItem(itemID, randomPref())); + } + } + return result; + } + + @Override + public float estimatePreference(long userID, long itemID) { + return randomPref(); + } + + private float randomPref() { + return minPref + random.nextFloat() * (maxPref - minPref); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + getDataModel().refresh(alreadyRefreshed); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java new file mode 100644 index 0000000..623a60b --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SamplingCandidateItemsStrategy.java @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import com.google.common.base.Preconditions; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.FixedSizeSamplingIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Iterator; + +/** + * <p>Returns all items that have not been rated by the user <em>(3)</em> and that were preferred by another user + * <em>(2)</em> that has preferred at least one item <em>(1)</em> that the current user has preferred too.</p> + * + * <p>This strategy uses sampling to limit the number of items that are considered, by sampling three different + * things, noted above:</p> + * + * <ol> + * <li>The items that the user has preferred</li> + * <li>The users who also prefer each of those items</li> + * <li>The items those users also prefer</li> + * </ol> + * + * <p>There is a maximum associated with each of these three things; if the number of items or users exceeds + * that max, it is sampled so that the expected number of items or users actually used in that part of the + * computation is equal to the max.</p> + * + * <p>Three arguments control these three maxima. Each is a "factor" f, which establishes the max at + * f * log2(n), where n is the number of users or items in the data. For example if factor #2 is 5, + * which controls the number of users sampled per item, then 5 * log2(# users) is the maximum for this + * part of the computation.</p> + * + * <p>Each can be set to not do any limiting with value {@link #NO_LIMIT_FACTOR}.</p> + */ +public class SamplingCandidateItemsStrategy extends AbstractCandidateItemsStrategy { + + private static final Logger log = LoggerFactory.getLogger(SamplingCandidateItemsStrategy.class); + + /** + * Default factor used if not otherwise specified, for all limits. (30). + */ + public static final int DEFAULT_FACTOR = 30; + /** + * Specify this value as a factor to mean no limit. + */ + public static final int NO_LIMIT_FACTOR = Integer.MAX_VALUE; + private static final int MAX_LIMIT = Integer.MAX_VALUE; + private static final double LOG2 = Math.log(2.0); + + private final int maxItems; + private final int maxUsersPerItem; + private final int maxItemsPerUser; + + /** + * Defaults to using no limit ({@link #NO_LIMIT_FACTOR}) for all factors, except + * {@code candidatesPerUserFactor} which defaults to {@link #DEFAULT_FACTOR}. + * + * @see #SamplingCandidateItemsStrategy(int, int, int, int, int) + */ + public SamplingCandidateItemsStrategy(int numUsers, int numItems) { + this(DEFAULT_FACTOR, DEFAULT_FACTOR, DEFAULT_FACTOR, numUsers, numItems); + } + + /** + * @param itemsFactor factor controlling max items considered for a user + * @param usersPerItemFactor factor controlling max users considered for each of those items + * @param candidatesPerUserFactor factor controlling max candidate items considered from each of those users + * @param numUsers number of users currently in the data + * @param numItems number of items in the data + */ + public SamplingCandidateItemsStrategy(int itemsFactor, + int usersPerItemFactor, + int candidatesPerUserFactor, + int numUsers, + int numItems) { + Preconditions.checkArgument(itemsFactor > 0, "itemsFactor must be greater then 0!"); + Preconditions.checkArgument(usersPerItemFactor > 0, "usersPerItemFactor must be greater then 0!"); + Preconditions.checkArgument(candidatesPerUserFactor > 0, "candidatesPerUserFactor must be greater then 0!"); + Preconditions.checkArgument(numUsers > 0, "numUsers must be greater then 0!"); + Preconditions.checkArgument(numItems > 0, "numItems must be greater then 0!"); + maxItems = computeMaxFrom(itemsFactor, numItems); + maxUsersPerItem = computeMaxFrom(usersPerItemFactor, numUsers); + maxItemsPerUser = computeMaxFrom(candidatesPerUserFactor, numItems); + log.debug("maxItems {}, maxUsersPerItem {}, maxItemsPerUser {}", maxItems, maxUsersPerItem, maxItemsPerUser); + } + + private static int computeMaxFrom(int factor, int numThings) { + if (factor == NO_LIMIT_FACTOR) { + return MAX_LIMIT; + } + long max = (long) (factor * (1.0 + Math.log(numThings) / LOG2)); + return max > MAX_LIMIT ? MAX_LIMIT : (int) max; + } + + @Override + protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems) + throws TasteException { + LongPrimitiveIterator preferredItemIDsIterator = new LongPrimitiveArrayIterator(preferredItemIDs); + if (preferredItemIDs.length > maxItems) { + double samplingRate = (double) maxItems / preferredItemIDs.length; +// log.info("preferredItemIDs.length {}, samplingRate {}", preferredItemIDs.length, samplingRate); + preferredItemIDsIterator = + new SamplingLongPrimitiveIterator(preferredItemIDsIterator, samplingRate); + } + FastIDSet possibleItemsIDs = new FastIDSet(); + while (preferredItemIDsIterator.hasNext()) { + long itemID = preferredItemIDsIterator.nextLong(); + PreferenceArray prefs = dataModel.getPreferencesForItem(itemID); + int prefsLength = prefs.length(); + if (prefsLength > maxUsersPerItem) { + Iterator<Preference> sampledPrefs = + new FixedSizeSamplingIterator<>(maxUsersPerItem, prefs.iterator()); + while (sampledPrefs.hasNext()) { + addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID())); + } + } else { + for (int i = 0; i < prefsLength; i++) { + addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(prefs.getUserID(i))); + } + } + } + if (!includeKnownItems) { + possibleItemsIDs.removeAll(preferredItemIDs); + } + return possibleItemsIDs; + } + + private void addSomeOf(FastIDSet possibleItemIDs, FastIDSet itemIDs) { + if (itemIDs.size() > maxItemsPerUser) { + LongPrimitiveIterator it = + new SamplingLongPrimitiveIterator(itemIDs.iterator(), (double) maxItemsPerUser / itemIDs.size()); + while (it.hasNext()) { + possibleItemIDs.add(it.nextLong()); + } + } else { + possibleItemIDs.addAll(itemIDs); + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java new file mode 100644 index 0000000..c6d417f --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/SimilarUser.java @@ -0,0 +1,80 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.common.RandomUtils; + +/** Simply encapsulates a user and a similarity value. */ +public final class SimilarUser implements Comparable<SimilarUser> { + + private final long userID; + private final double similarity; + + public SimilarUser(long userID, double similarity) { + this.userID = userID; + this.similarity = similarity; + } + + long getUserID() { + return userID; + } + + double getSimilarity() { + return similarity; + } + + @Override + public int hashCode() { + return (int) userID ^ RandomUtils.hashDouble(similarity); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof SimilarUser)) { + return false; + } + SimilarUser other = (SimilarUser) o; + return userID == other.getUserID() && similarity == other.getSimilarity(); + } + + @Override + public String toString() { + return "SimilarUser[user:" + userID + ", similarity:" + similarity + ']'; + } + + /** Defines an ordering from most similar to least similar. */ + @Override + public int compareTo(SimilarUser other) { + double otherSimilarity = other.getSimilarity(); + if (similarity > otherSimilarity) { + return -1; + } + if (similarity < otherSimilarity) { + return 1; + } + long otherUserID = other.getUserID(); + if (userID < otherUserID) { + return -1; + } + if (userID > otherUserID) { + return 1; + } + return 0; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java new file mode 100644 index 0000000..3c27145 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/TopItems.java @@ -0,0 +1,212 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.PriorityQueue; +import java.util.Queue; + +import com.google.common.collect.Lists; +import org.apache.mahout.cf.taste.common.NoSuchItemException; +import org.apache.mahout.cf.taste.common.NoSuchUserException; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; +import org.apache.mahout.cf.taste.impl.similarity.GenericUserSimilarity; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple class that refactors the "find top N things" logic that is used in several places. + * </p> + */ +public final class TopItems { + + private static final long[] NO_IDS = new long[0]; + + private TopItems() { } + + public static List<RecommendedItem> getTopItems(int howMany, + LongPrimitiveIterator possibleItemIDs, + IDRescorer rescorer, + Estimator<Long> estimator) throws TasteException { + Preconditions.checkArgument(possibleItemIDs != null, "possibleItemIDs is null"); + Preconditions.checkArgument(estimator != null, "estimator is null"); + + Queue<RecommendedItem> topItems = new PriorityQueue<>(howMany + 1, + Collections.reverseOrder(ByValueRecommendedItemComparator.getInstance())); + boolean full = false; + double lowestTopValue = Double.NEGATIVE_INFINITY; + while (possibleItemIDs.hasNext()) { + long itemID = possibleItemIDs.next(); + if (rescorer == null || !rescorer.isFiltered(itemID)) { + double preference; + try { + preference = estimator.estimate(itemID); + } catch (NoSuchItemException nsie) { + continue; + } + double rescoredPref = rescorer == null ? preference : rescorer.rescore(itemID, preference); + if (!Double.isNaN(rescoredPref) && (!full || rescoredPref > lowestTopValue)) { + topItems.add(new GenericRecommendedItem(itemID, (float) rescoredPref)); + if (full) { + topItems.poll(); + } else if (topItems.size() > howMany) { + full = true; + topItems.poll(); + } + lowestTopValue = topItems.peek().getValue(); + } + } + } + int size = topItems.size(); + if (size == 0) { + return Collections.emptyList(); + } + List<RecommendedItem> result = Lists.newArrayListWithCapacity(size); + result.addAll(topItems); + Collections.sort(result, ByValueRecommendedItemComparator.getInstance()); + return result; + } + + public static long[] getTopUsers(int howMany, + LongPrimitiveIterator allUserIDs, + IDRescorer rescorer, + Estimator<Long> estimator) throws TasteException { + Queue<SimilarUser> topUsers = new PriorityQueue<>(howMany + 1, Collections.reverseOrder()); + boolean full = false; + double lowestTopValue = Double.NEGATIVE_INFINITY; + while (allUserIDs.hasNext()) { + long userID = allUserIDs.next(); + if (rescorer != null && rescorer.isFiltered(userID)) { + continue; + } + double similarity; + try { + similarity = estimator.estimate(userID); + } catch (NoSuchUserException nsue) { + continue; + } + double rescoredSimilarity = rescorer == null ? similarity : rescorer.rescore(userID, similarity); + if (!Double.isNaN(rescoredSimilarity) && (!full || rescoredSimilarity > lowestTopValue)) { + topUsers.add(new SimilarUser(userID, rescoredSimilarity)); + if (full) { + topUsers.poll(); + } else if (topUsers.size() > howMany) { + full = true; + topUsers.poll(); + } + lowestTopValue = topUsers.peek().getSimilarity(); + } + } + int size = topUsers.size(); + if (size == 0) { + return NO_IDS; + } + List<SimilarUser> sorted = Lists.newArrayListWithCapacity(size); + sorted.addAll(topUsers); + Collections.sort(sorted); + long[] result = new long[size]; + int i = 0; + for (SimilarUser similarUser : sorted) { + result[i++] = similarUser.getUserID(); + } + return result; + } + + /** + * <p> + * Thanks to tsmorton for suggesting this functionality and writing part of the code. + * </p> + * + * @see GenericItemSimilarity#GenericItemSimilarity(Iterable, int) + * @see GenericItemSimilarity#GenericItemSimilarity(org.apache.mahout.cf.taste.similarity.ItemSimilarity, + * org.apache.mahout.cf.taste.model.DataModel, int) + */ + public static List<GenericItemSimilarity.ItemItemSimilarity> getTopItemItemSimilarities( + int howMany, Iterator<GenericItemSimilarity.ItemItemSimilarity> allSimilarities) { + + Queue<GenericItemSimilarity.ItemItemSimilarity> topSimilarities + = new PriorityQueue<>(howMany + 1, Collections.reverseOrder()); + boolean full = false; + double lowestTopValue = Double.NEGATIVE_INFINITY; + while (allSimilarities.hasNext()) { + GenericItemSimilarity.ItemItemSimilarity similarity = allSimilarities.next(); + double value = similarity.getValue(); + if (!Double.isNaN(value) && (!full || value > lowestTopValue)) { + topSimilarities.add(similarity); + if (full) { + topSimilarities.poll(); + } else if (topSimilarities.size() > howMany) { + full = true; + topSimilarities.poll(); + } + lowestTopValue = topSimilarities.peek().getValue(); + } + } + int size = topSimilarities.size(); + if (size == 0) { + return Collections.emptyList(); + } + List<GenericItemSimilarity.ItemItemSimilarity> result = Lists.newArrayListWithCapacity(size); + result.addAll(topSimilarities); + Collections.sort(result); + return result; + } + + public static List<GenericUserSimilarity.UserUserSimilarity> getTopUserUserSimilarities( + int howMany, Iterator<GenericUserSimilarity.UserUserSimilarity> allSimilarities) { + + Queue<GenericUserSimilarity.UserUserSimilarity> topSimilarities + = new PriorityQueue<>(howMany + 1, Collections.reverseOrder()); + boolean full = false; + double lowestTopValue = Double.NEGATIVE_INFINITY; + while (allSimilarities.hasNext()) { + GenericUserSimilarity.UserUserSimilarity similarity = allSimilarities.next(); + double value = similarity.getValue(); + if (!Double.isNaN(value) && (!full || value > lowestTopValue)) { + topSimilarities.add(similarity); + if (full) { + topSimilarities.poll(); + } else if (topSimilarities.size() > howMany) { + full = true; + topSimilarities.poll(); + } + lowestTopValue = topSimilarities.peek().getValue(); + } + } + int size = topSimilarities.size(); + if (size == 0) { + return Collections.emptyList(); + } + List<GenericUserSimilarity.UserUserSimilarity> result = Lists.newArrayListWithCapacity(size); + result.addAll(topSimilarities); + Collections.sort(result); + return result; + } + + public interface Estimator<T> { + double estimate(T thing) throws TasteException; + } + +}
