http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java new file mode 100644 index 0000000..8ea1660 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/PearsonCorrelationSimilarity.java @@ -0,0 +1,93 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.common.Weighting; +import org.apache.mahout.cf.taste.model.DataModel; + +import com.google.common.base.Preconditions; + +/** + * <p> + * An implementation of the Pearson correlation. For users X and Y, the following values are calculated: + * </p> + * + * <ul> + * <li>sumX2: sum of the square of all X's preference values</li> + * <li>sumY2: sum of the square of all Y's preference values</li> + * <li>sumXY: sum of the product of X and Y's preference value for all items for which both X and Y express a + * preference</li> + * </ul> + * + * <p> + * The correlation is then: + * + * <p> + * {@code sumXY / sqrt(sumX2 * sumY2)} + * </p> + * + * <p> + * Note that this correlation "centers" its data, shifts the user's preference values so that each of their + * means is 0. This is necessary to achieve expected behavior on all data sets. + * </p> + * + * <p> + * This correlation implementation is equivalent to the cosine similarity since the data it receives + * is assumed to be centered -- mean is 0. The correlation may be interpreted as the cosine of the angle + * between the two vectors defined by the users' preference values. + * </p> + * + * <p> + * For cosine similarity on uncentered data, see {@link UncenteredCosineSimilarity}. + * </p> + */ +public final class PearsonCorrelationSimilarity extends AbstractSimilarity { + + /** + * @throws IllegalArgumentException if {@link DataModel} does not have preference values + */ + public PearsonCorrelationSimilarity(DataModel dataModel) throws TasteException { + this(dataModel, Weighting.UNWEIGHTED); + } + + /** + * @throws IllegalArgumentException if {@link DataModel} does not have preference values + */ + public PearsonCorrelationSimilarity(DataModel dataModel, Weighting weighting) throws TasteException { + super(dataModel, weighting, true); + Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values"); + } + + @Override + double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) { + if (n == 0) { + return Double.NaN; + } + // Note that sum of X and sum of Y don't appear here since they are assumed to be 0; + // the data is assumed to be centered. + double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2); + if (denominator == 0.0) { + // One or both parties has -all- the same ratings; + // can't really say much similarity under this measure + return Double.NaN; + } + return sumXY / denominator; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java new file mode 100644 index 0000000..1116368 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/SpearmanCorrelationSimilarity.java @@ -0,0 +1,135 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity; + +import java.util.Collection; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.similarity.PreferenceInferrer; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import com.google.common.base.Preconditions; + +/** + * <p> + * Like {@link PearsonCorrelationSimilarity}, but compares relative ranking of preference values instead of + * preference values themselves. That is, each user's preferences are sorted and then assign a rank as their + * preference value, with 1 being assigned to the least preferred item. + * </p> + */ +public final class SpearmanCorrelationSimilarity implements UserSimilarity { + + private final DataModel dataModel; + + public SpearmanCorrelationSimilarity(DataModel dataModel) { + this.dataModel = Preconditions.checkNotNull(dataModel); + } + + @Override + public double userSimilarity(long userID1, long userID2) throws TasteException { + PreferenceArray xPrefs = dataModel.getPreferencesFromUser(userID1); + PreferenceArray yPrefs = dataModel.getPreferencesFromUser(userID2); + int xLength = xPrefs.length(); + int yLength = yPrefs.length(); + + if (xLength <= 1 || yLength <= 1) { + return Double.NaN; + } + + // Copy prefs since we need to modify pref values to ranks + xPrefs = xPrefs.clone(); + yPrefs = yPrefs.clone(); + + // First sort by values from low to high + xPrefs.sortByValue(); + yPrefs.sortByValue(); + + // Assign ranks from low to high + float nextRank = 1.0f; + for (int i = 0; i < xLength; i++) { + // ... but only for items that are common to both pref arrays + if (yPrefs.hasPrefWithItemID(xPrefs.getItemID(i))) { + xPrefs.setValue(i, nextRank); + nextRank += 1.0f; + } + // Other values are bogus but don't matter + } + nextRank = 1.0f; + for (int i = 0; i < yLength; i++) { + if (xPrefs.hasPrefWithItemID(yPrefs.getItemID(i))) { + yPrefs.setValue(i, nextRank); + nextRank += 1.0f; + } + } + + xPrefs.sortByItem(); + yPrefs.sortByItem(); + + long xIndex = xPrefs.getItemID(0); + long yIndex = yPrefs.getItemID(0); + int xPrefIndex = 0; + int yPrefIndex = 0; + + double sumXYRankDiff2 = 0.0; + int count = 0; + + while (true) { + int compare = xIndex < yIndex ? -1 : xIndex > yIndex ? 1 : 0; + if (compare == 0) { + double diff = xPrefs.getValue(xPrefIndex) - yPrefs.getValue(yPrefIndex); + sumXYRankDiff2 += diff * diff; + count++; + } + if (compare <= 0) { + if (++xPrefIndex >= xLength) { + break; + } + xIndex = xPrefs.getItemID(xPrefIndex); + } + if (compare >= 0) { + if (++yPrefIndex >= yLength) { + break; + } + yIndex = yPrefs.getItemID(yPrefIndex); + } + } + + if (count <= 1) { + return Double.NaN; + } + + // When ranks are unique, this formula actually gives the Pearson correlation + return 1.0 - 6.0 * sumXYRankDiff2 / (count * (count * count - 1)); + } + + @Override + public void setPreferenceInferrer(PreferenceInferrer inferrer) { + throw new UnsupportedOperationException(); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed); + RefreshHelper.maybeRefresh(alreadyRefreshed, dataModel); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java new file mode 100644 index 0000000..0c3a0a4 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/TanimotoCoefficientSimilarity.java @@ -0,0 +1,126 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity; + +import java.util.Collection; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.similarity.PreferenceInferrer; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +/** + * <p> + * An implementation of a "similarity" based on the <a + * href="http://en.wikipedia.org/wiki/Jaccard_index#Tanimoto_coefficient_.28extended_Jaccard_coefficient.29"> + * Tanimoto coefficient</a>, or extended <a href="http://en.wikipedia.org/wiki/Jaccard_index">Jaccard + * coefficient</a>. + * </p> + * + * <p> + * This is intended for "binary" data sets where a user either expresses a generic "yes" preference for an + * item or has no preference. The actual preference values do not matter here, only their presence or absence. + * </p> + * + * <p> + * The value returned is in [0,1]. + * </p> + */ +public final class TanimotoCoefficientSimilarity extends AbstractItemSimilarity implements UserSimilarity { + + public TanimotoCoefficientSimilarity(DataModel dataModel) { + super(dataModel); + } + + /** + * @throws UnsupportedOperationException + */ + @Override + public void setPreferenceInferrer(PreferenceInferrer inferrer) { + throw new UnsupportedOperationException(); + } + + @Override + public double userSimilarity(long userID1, long userID2) throws TasteException { + + DataModel dataModel = getDataModel(); + FastIDSet xPrefs = dataModel.getItemIDsFromUser(userID1); + FastIDSet yPrefs = dataModel.getItemIDsFromUser(userID2); + + int xPrefsSize = xPrefs.size(); + int yPrefsSize = yPrefs.size(); + if (xPrefsSize == 0 && yPrefsSize == 0) { + return Double.NaN; + } + if (xPrefsSize == 0 || yPrefsSize == 0) { + return 0.0; + } + + int intersectionSize = + xPrefsSize < yPrefsSize ? yPrefs.intersectionSize(xPrefs) : xPrefs.intersectionSize(yPrefs); + if (intersectionSize == 0) { + return Double.NaN; + } + + int unionSize = xPrefsSize + yPrefsSize - intersectionSize; + + return (double) intersectionSize / (double) unionSize; + } + + @Override + public double itemSimilarity(long itemID1, long itemID2) throws TasteException { + int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1); + return doItemSimilarity(itemID1, itemID2, preferring1); + } + + @Override + public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException { + int preferring1 = getDataModel().getNumUsersWithPreferenceFor(itemID1); + int length = itemID2s.length; + double[] result = new double[length]; + for (int i = 0; i < length; i++) { + result[i] = doItemSimilarity(itemID1, itemID2s[i], preferring1); + } + return result; + } + + private double doItemSimilarity(long itemID1, long itemID2, int preferring1) throws TasteException { + DataModel dataModel = getDataModel(); + int preferring1and2 = dataModel.getNumUsersWithPreferenceFor(itemID1, itemID2); + if (preferring1and2 == 0) { + return Double.NaN; + } + int preferring2 = dataModel.getNumUsersWithPreferenceFor(itemID2); + return (double) preferring1and2 / (double) (preferring1 + preferring2 - preferring1and2); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + alreadyRefreshed = RefreshHelper.buildRefreshed(alreadyRefreshed); + RefreshHelper.maybeRefresh(alreadyRefreshed, getDataModel()); + } + + @Override + public String toString() { + return "TanimotoCoefficientSimilarity[dataModel:" + getDataModel() + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java new file mode 100644 index 0000000..6260606 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/UncenteredCosineSimilarity.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.common.Weighting; +import org.apache.mahout.cf.taste.model.DataModel; + +import com.google.common.base.Preconditions; + +/** + * <p> + * An implementation of the cosine similarity. The result is the cosine of the angle formed between + * the two preference vectors. + * </p> + * + * <p> + * Note that this similarity does not "center" its data, shifts the user's preference values so that each of their + * means is 0. For this behavior, use {@link PearsonCorrelationSimilarity}, which actually is mathematically + * equivalent for centered data. + * </p> + */ +public final class UncenteredCosineSimilarity extends AbstractSimilarity { + + /** + * @throws IllegalArgumentException if {@link DataModel} does not have preference values + */ + public UncenteredCosineSimilarity(DataModel dataModel) throws TasteException { + this(dataModel, Weighting.UNWEIGHTED); + } + + /** + * @throws IllegalArgumentException if {@link DataModel} does not have preference values + */ + public UncenteredCosineSimilarity(DataModel dataModel, Weighting weighting) throws TasteException { + super(dataModel, weighting, false); + Preconditions.checkArgument(dataModel.hasPreferenceValues(), "DataModel doesn't have preference values"); + } + + @Override + double computeResult(int n, double sumXY, double sumX2, double sumY2, double sumXYdiff2) { + if (n == 0) { + return Double.NaN; + } + double denominator = Math.sqrt(sumX2) * Math.sqrt(sumY2); + if (denominator == 0.0) { + // One or both parties has -all- the same ratings; + // can't really say much similarity under this measure + return Double.NaN; + } + return sumXY / denominator; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java new file mode 100644 index 0000000..1ae45c2 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterable.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.file; + +import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; + +/** + * {@link Iterable} to be able to read a file linewise into a {@link GenericItemSimilarity} + */ +final class FileItemItemSimilarityIterable implements Iterable<GenericItemSimilarity.ItemItemSimilarity> { + + private final File similaritiesFile; + + FileItemItemSimilarityIterable(File similaritiesFile) { + this.similaritiesFile = similaritiesFile; + } + + @Override + public Iterator<GenericItemSimilarity.ItemItemSimilarity> iterator() { + try { + return new FileItemItemSimilarityIterator(similaritiesFile); + } catch (IOException ioe) { + throw new IllegalStateException("Can't read " + similaritiesFile, ioe); + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java new file mode 100644 index 0000000..c071159 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemItemSimilarityIterator.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.file; + +import com.google.common.base.Function; +import com.google.common.collect.ForwardingIterator; +import com.google.common.collect.Iterators; +import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; +import org.apache.mahout.common.iterator.FileLineIterator; + +import java.io.File; +import java.io.IOException; +import java.util.Iterator; +import java.util.regex.Pattern; + +/** + * a simple iterator using a {@link FileLineIterator} internally, parsing each + * line into an {@link GenericItemSimilarity.ItemItemSimilarity}. + */ +final class FileItemItemSimilarityIterator extends ForwardingIterator<GenericItemSimilarity.ItemItemSimilarity> { + + private static final Pattern SEPARATOR = Pattern.compile("[,\t]"); + + private final Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate; + + FileItemItemSimilarityIterator(File similaritiesFile) throws IOException { + delegate = Iterators.transform( + new FileLineIterator(similaritiesFile), + new Function<String, GenericItemSimilarity.ItemItemSimilarity>() { + @Override + public GenericItemSimilarity.ItemItemSimilarity apply(String from) { + String[] tokens = SEPARATOR.split(from); + return new GenericItemSimilarity.ItemItemSimilarity(Long.parseLong(tokens[0]), + Long.parseLong(tokens[1]), + Double.parseDouble(tokens[2])); + } + }); + } + + @Override + protected Iterator<GenericItemSimilarity.ItemItemSimilarity> delegate() { + return delegate; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java new file mode 100644 index 0000000..712b96a --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.file; + +import java.io.File; +import java.util.Collection; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * An {@link ItemSimilarity} backed by a comma-delimited file. This class typically expects a file where each line + * contains an item ID, followed by another item ID, followed by a similarity value, separated by commas. You may also + * use tabs. + * </p> + * + * <p> + * The similarity value is assumed to be parseable as a {@code double} having a value between -1 and 1. The + * item IDs are parsed as {@code long}s. Similarities are symmetric so for a pair of items you do not have to + * include 2 lines in the file. + * </p> + * + * <p> + * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file + * has been reloaded very recently already. + * </p> + * + * <p> + * This class is not intended for use with very large amounts of data. For that, a JDBC-backed {@link ItemSimilarity} + * and a database are more appropriate. + * </p> + */ +public class FileItemSimilarity implements ItemSimilarity { + + public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute? + + private ItemSimilarity delegate; + private final ReentrantLock reloadLock; + private final File dataFile; + private long lastModified; + private final long minReloadIntervalMS; + + private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class); + + /** + * @param dataFile + * file containing the similarity data + */ + public FileItemSimilarity(File dataFile) { + this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS); + } + + /** + * @param minReloadIntervalMS + * the minimum interval in milliseconds after which a full reload of the original datafile is done + * when refresh() is called + * @see #FileItemSimilarity(File) + */ + public FileItemSimilarity(File dataFile, long minReloadIntervalMS) { + Preconditions.checkArgument(dataFile != null, "dataFile is null"); + Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(), + "dataFile is missing or a directory: %s", dataFile); + + log.info("Creating FileItemSimilarity for file {}", dataFile); + + this.dataFile = dataFile.getAbsoluteFile(); + this.lastModified = dataFile.lastModified(); + this.minReloadIntervalMS = minReloadIntervalMS; + this.reloadLock = new ReentrantLock(); + + reload(); + } + + @Override + public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException { + return delegate.itemSimilarities(itemID1, itemID2s); + } + + @Override + public long[] allSimilarItemIDs(long itemID) throws TasteException { + return delegate.allSimilarItemIDs(itemID); + } + + @Override + public double itemSimilarity(long itemID1, long itemID2) throws TasteException { + return delegate.itemSimilarity(itemID1, itemID2); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + if (dataFile.lastModified() > lastModified + minReloadIntervalMS) { + log.debug("File has changed; reloading..."); + reload(); + } + } + + protected void reload() { + if (reloadLock.tryLock()) { + try { + long newLastModified = dataFile.lastModified(); + delegate = new GenericItemSimilarity(new FileItemItemSimilarityIterable(dataFile)); + lastModified = newLastModified; + } finally { + reloadLock.unlock(); + } + } + } + + @Override + public String toString() { + return "FileItemSimilarity[dataFile:" + dataFile + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java new file mode 100644 index 0000000..631ec9b --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.precompute; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; + +import com.google.common.io.Closeables; +import org.apache.commons.io.Charsets; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter; + +/** + * Persist the precomputed item similarities to a file that can later be used + * by a {@link org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity} + */ +public class FileSimilarItemsWriter implements SimilarItemsWriter { + + private final File file; + private BufferedWriter writer; + + public FileSimilarItemsWriter(File file) { + this.file = file; + } + + @Override + public void open() throws IOException { + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)); + } + + @Override + public void add(SimilarItems similarItems) throws IOException { + String itemID = String.valueOf(similarItems.getItemID()); + for (SimilarItem similarItem : similarItems.getSimilarItems()) { + writer.write(itemID); + writer.write(','); + writer.write(String.valueOf(similarItem.getItemID())); + writer.write(','); + writer.write(String.valueOf(similarItem.getSimilarity())); + writer.newLine(); + } + } + + @Override + public void close() throws IOException { + Closeables.close(writer, false); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java new file mode 100644 index 0000000..b7b52cf --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.precompute; + +import com.google.common.io.Closeables; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Precompute item similarities in parallel on a single machine. The recommender given to this class must use a + * DataModel that holds the interactions in memory (such as + * {@link org.apache.mahout.cf.taste.impl.model.GenericDataModel} or + * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}) as fast random access to the data is required + */ +public class MultithreadedBatchItemSimilarities extends BatchItemSimilarities { + + private int batchSize; + + private static final int DEFAULT_BATCH_SIZE = 100; + + private static final Logger log = LoggerFactory.getLogger(MultithreadedBatchItemSimilarities.class); + + /** + * @param recommender recommender to use + * @param similarItemsPerItem number of similar items to compute per item + */ + public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem) { + this(recommender, similarItemsPerItem, DEFAULT_BATCH_SIZE); + } + + /** + * @param recommender recommender to use + * @param similarItemsPerItem number of similar items to compute per item + * @param batchSize size of item batches sent to worker threads + */ + public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem, int batchSize) { + super(recommender, similarItemsPerItem); + this.batchSize = batchSize; + } + + @Override + public int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, SimilarItemsWriter writer) + throws IOException { + + ExecutorService executorService = Executors.newFixedThreadPool(degreeOfParallelism + 1); + + Output output = null; + try { + writer.open(); + + DataModel dataModel = getRecommender().getDataModel(); + + BlockingQueue<long[]> itemsIDsInBatches = queueItemIDsInBatches(dataModel, batchSize, degreeOfParallelism); + BlockingQueue<List<SimilarItems>> results = new LinkedBlockingQueue<>(); + + AtomicInteger numActiveWorkers = new AtomicInteger(degreeOfParallelism); + for (int n = 0; n < degreeOfParallelism; n++) { + executorService.execute(new SimilarItemsWorker(n, itemsIDsInBatches, results, numActiveWorkers)); + } + + output = new Output(results, writer, numActiveWorkers); + executorService.execute(output); + + } catch (Exception e) { + throw new IOException(e); + } finally { + executorService.shutdown(); + try { + boolean succeeded = executorService.awaitTermination(maxDurationInHours, TimeUnit.HOURS); + if (!succeeded) { + throw new RuntimeException("Unable to complete the computation in " + maxDurationInHours + " hours!"); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + Closeables.close(writer, false); + } + + return output.getNumSimilaritiesProcessed(); + } + + private static BlockingQueue<long[]> queueItemIDsInBatches(DataModel dataModel, int batchSize, + int degreeOfParallelism) + throws TasteException { + + LongPrimitiveIterator itemIDs = dataModel.getItemIDs(); + int numItems = dataModel.getNumItems(); + + BlockingQueue<long[]> itemIDBatches = new LinkedBlockingQueue<>((numItems / batchSize) + 1); + + long[] batch = new long[batchSize]; + int pos = 0; + while (itemIDs.hasNext()) { + batch[pos] = itemIDs.nextLong(); + pos++; + if (pos == batchSize) { + itemIDBatches.add(batch.clone()); + pos = 0; + } + } + + if (pos > 0) { + long[] lastBatch = new long[pos]; + System.arraycopy(batch, 0, lastBatch, 0, pos); + itemIDBatches.add(lastBatch); + } + + if (itemIDBatches.size() < degreeOfParallelism) { + throw new IllegalStateException("Degree of parallelism [" + degreeOfParallelism + "] " + + " is larger than number of batches [" + itemIDBatches.size() +"]."); + } + + log.info("Queued {} items in {} batches", numItems, itemIDBatches.size()); + + return itemIDBatches; + } + + + private static class Output implements Runnable { + + private final BlockingQueue<List<SimilarItems>> results; + private final SimilarItemsWriter writer; + private final AtomicInteger numActiveWorkers; + private int numSimilaritiesProcessed = 0; + + Output(BlockingQueue<List<SimilarItems>> results, SimilarItemsWriter writer, AtomicInteger numActiveWorkers) { + this.results = results; + this.writer = writer; + this.numActiveWorkers = numActiveWorkers; + } + + private int getNumSimilaritiesProcessed() { + return numSimilaritiesProcessed; + } + + @Override + public void run() { + while (numActiveWorkers.get() != 0 || !results.isEmpty()) { + try { + List<SimilarItems> similarItemsOfABatch = results.poll(10, TimeUnit.MILLISECONDS); + if (similarItemsOfABatch != null) { + for (SimilarItems similarItems : similarItemsOfABatch) { + writer.add(similarItems); + numSimilaritiesProcessed += similarItems.numSimilarItems(); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + } + + private class SimilarItemsWorker implements Runnable { + + private final int number; + private final BlockingQueue<long[]> itemIDBatches; + private final BlockingQueue<List<SimilarItems>> results; + private final AtomicInteger numActiveWorkers; + + SimilarItemsWorker(int number, BlockingQueue<long[]> itemIDBatches, BlockingQueue<List<SimilarItems>> results, + AtomicInteger numActiveWorkers) { + this.number = number; + this.itemIDBatches = itemIDBatches; + this.results = results; + this.numActiveWorkers = numActiveWorkers; + } + + @Override + public void run() { + + int numBatchesProcessed = 0; + while (!itemIDBatches.isEmpty()) { + try { + long[] itemIDBatch = itemIDBatches.take(); + + List<SimilarItems> similarItemsOfBatch = new ArrayList<>(itemIDBatch.length); + for (long itemID : itemIDBatch) { + List<RecommendedItem> similarItems = getRecommender().mostSimilarItems(itemID, getSimilarItemsPerItem()); + similarItemsOfBatch.add(new SimilarItems(itemID, similarItems)); + } + + results.offer(similarItemsOfBatch); + + if (++numBatchesProcessed % 5 == 0) { + log.info("worker {} processed {} batches", number, numBatchesProcessed); + } + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + log.info("worker {} processed {} batches. done.", number, numBatchesProcessed); + numActiveWorkers.decrementAndGet(); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java new file mode 100644 index 0000000..022d02d --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import java.io.Serializable; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; + +/** + * <p> + * Implementations represent a repository of information about users and their associated {@link Preference}s + * for items. + * </p> + */ +public interface DataModel extends Refreshable, Serializable { + + /** + * @return all user IDs in the model, in order + * @throws TasteException + * if an error occurs while accessing the data + */ + LongPrimitiveIterator getUserIDs() throws TasteException; + + /** + * @param userID + * ID of user to get prefs for + * @return user's preferences, ordered by item ID + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + PreferenceArray getPreferencesFromUser(long userID) throws TasteException; + + /** + * @param userID + * ID of user to get prefs for + * @return IDs of items user expresses a preference for + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + FastIDSet getItemIDsFromUser(long userID) throws TasteException; + + /** + * @return a {@link LongPrimitiveIterator} of all item IDs in the model, in order + * @throws TasteException + * if an error occurs while accessing the data + */ + LongPrimitiveIterator getItemIDs() throws TasteException; + + /** + * @param itemID + * item ID + * @return all existing {@link Preference}s expressed for that item, ordered by user ID, as an array + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + PreferenceArray getPreferencesForItem(long itemID) throws TasteException; + + /** + * Retrieves the preference value for a single user and item. + * + * @param userID + * user ID to get pref value from + * @param itemID + * item ID to get pref value for + * @return preference value from the given user for the given item or null if none exists + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + Float getPreferenceValue(long userID, long itemID) throws TasteException; + + /** + * Retrieves the time at which a preference value from a user and item was set, if known. + * Time is expressed in the usual way, as a number of milliseconds since the epoch. + * + * @param userID user ID for preference in question + * @param itemID item ID for preference in question + * @return time at which preference was set or null if no preference exists or its time is not known + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if the user does not exist + * @throws TasteException if an error occurs while accessing the data + */ + Long getPreferenceTime(long userID, long itemID) throws TasteException; + + /** + * @return total number of items known to the model. This is generally the union of all items preferred by + * at least one user but could include more. + * @throws TasteException + * if an error occurs while accessing the data + */ + int getNumItems() throws TasteException; + + /** + * @return total number of users known to the model. + * @throws TasteException + * if an error occurs while accessing the data + */ + int getNumUsers() throws TasteException; + + /** + * @param itemID item ID to check for + * @return the number of users who have expressed a preference for the item + * @throws TasteException if an error occurs while accessing the data + */ + int getNumUsersWithPreferenceFor(long itemID) throws TasteException; + + /** + * @param itemID1 first item ID to check for + * @param itemID2 second item ID to check for + * @return the number of users who have expressed a preference for the items + * @throws TasteException if an error occurs while accessing the data + */ + int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException; + + /** + * <p> + * Sets a particular preference (item plus rating) for a user. + * </p> + * + * @param userID + * user to set preference for + * @param itemID + * item to set preference for + * @param value + * preference value + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + void setPreference(long userID, long itemID, float value) throws TasteException; + + /** + * <p> + * Removes a particular preference for a user. + * </p> + * + * @param userID + * user from which to remove preference + * @param itemID + * item to remove preference for + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + void removePreference(long userID, long itemID) throws TasteException; + + /** + * @return true if this implementation actually stores and returns distinct preference values; + * that is, if it is not a 'boolean' DataModel + */ + boolean hasPreferenceValues(); + + /** + * @return the maximum preference value that is possible in the current problem domain being evaluated. For + * example, if the domain is movie ratings on a scale of 1 to 5, this should be 5. While a + * {@link org.apache.mahout.cf.taste.recommender.Recommender} may estimate a preference value above 5.0, it + * isn't "fair" to consider that the system is actually suggesting an impossible rating of, say, 5.4 stars. + * In practice the application would cap this estimate to 5.0. Since evaluators evaluate + * the difference between estimated and actual value, this at least prevents this effect from unfairly + * penalizing a {@link org.apache.mahout.cf.taste.recommender.Recommender} + */ + float getMaxPreference(); + + /** + * @see #getMaxPreference() + */ + float getMinPreference(); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java new file mode 100644 index 0000000..cc477fe --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Mahout 0.2 changed the framework to operate only in terms of numeric (long) ID values for users and items. + * This is, obviously, not compatible with applications that used other key types -- most commonly + * {@link String}. Implementation of this class provide support for mapping String to longs and vice versa in + * order to provide a smoother migration path to applications that must still use strings as IDs. + * </p> + * + * <p> + * The mapping from strings to 64-bit numeric values is fixed here, to provide a standard implementation that + * is 'portable' or reproducible outside the framework easily. See {@link #toLongID(String)}. + * </p> + * + * <p> + * Because this mapping is deterministically computable, it does not need to be stored. Indeed, subclasses' + * job is to store the reverse mapping. There are an infinite number of strings but only a fixed number of + * longs, so, it is possible for two strings to map to the same value. Subclasses do not treat this as an + * error but rather retain only the most recent mapping, overwriting a previous mapping. The probability of + * collision in a 64-bit space is quite small, but not zero. However, in the context of a collaborative + * filtering problem, the consequence of a collision is small, at worst -- perhaps one user receives another + * recommendations. + * </p> + * + * @since 0.2 + */ +public interface IDMigrator extends Refreshable { + + /** + * @return the top 8 bytes of the MD5 hash of the bytes of the given {@link String}'s UTF-8 encoding as a + * long. + */ + long toLongID(String stringID); + + /** + * @return the string ID most recently associated with the given long ID, or null if doesn't exist + * @throws TasteException + * if an error occurs while retrieving the mapping + */ + String toStringID(long longID) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java new file mode 100644 index 0000000..e91ed48 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import javax.sql.DataSource; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; + +public interface JDBCDataModel extends DataModel { + + /** + * @return {@link DataSource} underlying this model + */ + DataSource getDataSource(); + + /** + * Hmm, should this exist elsewhere? seems like most relevant for a DB implementation, which is not in + * memory, which might want to export to memory. + * + * @return all user preference data + */ + FastByIDMap<PreferenceArray> exportWithPrefs() throws TasteException; + + FastByIDMap<FastIDSet> exportWithIDsOnly() throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java new file mode 100644 index 0000000..fe0150a --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +/** + * <p> + * A {@link Preference} encapsulates an item and a preference value, which indicates the strength of the + * preference for it. {@link Preference}s are associated to users. + * </p> + */ +public interface Preference { + + /** @return ID of user who prefers the item */ + long getUserID(); + + /** @return item ID that is preferred */ + long getItemID(); + + /** + * @return strength of the preference for that item. Zero should indicate "no preference either way"; + * positive values indicate preference and negative values indicate dislike + */ + float getValue(); + + /** + * Sets the strength of the preference for this item + * + * @param value + * new preference + */ + void setValue(float value); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java new file mode 100644 index 0000000..3886bc6 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import java.io.Serializable; + +/** + * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a + * more memory-efficient representation. + */ +public interface PreferenceArray extends Cloneable, Serializable, Iterable<Preference> { + + /** + * @return size of length of the "array" + */ + int length(); + + /** + * @param i + * index + * @return a materialized {@link Preference} representation of the preference at i + */ + Preference get(int i); + + /** + * Sets preference at i from information in the given {@link Preference} + * + * @param i + * @param pref + */ + void set(int i, Preference pref); + + /** + * @param i + * index + * @return user ID from preference at i + */ + long getUserID(int i); + + /** + * Sets user ID for preference at i. + * + * @param i + * index + * @param userID + * new user ID + */ + void setUserID(int i, long userID); + + /** + * @param i + * index + * @return item ID from preference at i + */ + long getItemID(int i); + + /** + * Sets item ID for preference at i. + * + * @param i + * index + * @param itemID + * new item ID + */ + void setItemID(int i, long itemID); + + /** + * @return all user or item IDs + */ + long[] getIDs(); + + /** + * @param i + * index + * @return preference value from preference at i + */ + float getValue(int i); + + /** + * Sets preference value for preference at i. + * + * @param i + * index + * @param value + * new preference value + */ + void setValue(int i, float value); + + /** + * @return independent copy of this object + */ + PreferenceArray clone(); + + /** + * Sorts underlying array by user ID, ascending. + */ + void sortByUser(); + + /** + * Sorts underlying array by item ID, ascending. + */ + void sortByItem(); + + /** + * Sorts underlying array by preference value, ascending. + */ + void sortByValue(); + + /** + * Sorts underlying array by preference value, descending. + */ + void sortByValueReversed(); + + /** + * @param userID + * user ID + * @return true if array contains a preference with given user ID + */ + boolean hasPrefWithUserID(long userID); + + /** + * @param itemID + * item ID + * @return true if array contains a preference with given item ID + */ + boolean hasPrefWithItemID(long itemID); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java new file mode 100644 index 0000000..ff29a34 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import org.apache.mahout.cf.taste.common.TasteException; + +public interface UpdatableIDMigrator extends IDMigrator { + + /** + * Stores the reverse long-to-String mapping in some kind of backing store. Note that this must be called + * directly (or indirectly through {@link #initialize(Iterable)}) for every String that might be encountered + * in the application, or else the mapping will not be known. + * + * @param longID + * long ID + * @param stringID + * string ID that maps to/from that long ID + * @throws TasteException + * if an error occurs while saving the mapping + */ + void storeMapping(long longID, String stringID) throws TasteException; + + /** + * Make the mapping aware of the given string IDs. This must be called initially before the implementation + * is used, or else it will not be aware of reverse long-to-String mappings. + * + * @throws TasteException + * if an error occurs while storing the mappings + */ + void initialize(Iterable<String> stringIDs) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java new file mode 100644 index 0000000..2a143e1 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.neighborhood; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Implementations of this interface compute a "neighborhood" of users like a given user. This neighborhood + * can be used to compute recommendations then. + * </p> + */ +public interface UserNeighborhood extends Refreshable { + + /** + * @param userID + * ID of user for which a neighborhood will be computed + * @return IDs of users in the neighborhood + * @throws TasteException + * if an error occurs while accessing data + */ + long[] getUserNeighborhood(long userID) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java new file mode 100644 index 0000000..ada1949 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; + +/** + * Used to retrieve all items that could possibly be recommended to the user + */ +public interface CandidateItemsStrategy extends Refreshable { + + /** + * @return IDs of all items that could be recommended to the user + */ + FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel, + boolean includeKnownItems) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java new file mode 100644 index 0000000..d9a9cf7 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +/** + * <p> + * A {@link Rescorer} which operates on {@code long} primitive IDs, rather than arbitrary {@link Object}s. + * This is provided since most uses of this interface in the framework take IDs (as {@code long}) as an + * argument, and so this can be used to avoid unnecessary boxing/unboxing. + * </p> + */ +public interface IDRescorer { + + /** + * @param id + * ID of thing (user, item, etc.) to rescore + * @param originalScore + * original score + * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely + */ + double rescore(long id, double originalScore); + + /** + * Returns {@code true} to exclude the given thing. + * + * @param id + * ID of thing (user, item, etc.) to rescore + * @return {@code true} to exclude, {@code false} otherwise + */ + boolean isFiltered(long id); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java new file mode 100644 index 0000000..570f851 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import java.util.List; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.common.LongPair; + +/** + * <p> + * Interface implemented by "item-based" recommenders. + * </p> + */ +public interface ItemBasedRecommender extends Recommender { + + /** + * @param itemID + * ID of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @return items most similar to the given item, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException; + + /** + * @param itemID + * ID of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @return itemss most similar to the given item, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long itemID, int howMany, Rescorer<LongPair> rescorer) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find estimates used to determine most similar items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + Rescorer<LongPair> rescorer) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param excludeItemIfNotSimilarToAll + * exclude an item if it is not similar to each of the input items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + boolean excludeItemIfNotSimilarToAll) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @param excludeItemIfNotSimilarToAll + * exclude an item if it is not similar to each of the input items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + Rescorer<LongPair> rescorer, + boolean excludeItemIfNotSimilarToAll) throws TasteException; + + /** + * <p> + * Lists the items that were most influential in recommending a given item to a given user. Exactly how this + * is determined is left to the implementation, but, generally this will return items that the user prefers + * and that are similar to the given item. + * </p> + * + * <p> + * This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning + * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it + * encapsulates an item and value. The value here does not necessarily have a consistent interpretation or + * expected range; it will be higher the more influential the item was in the recommendation. + * </p> + * + * @param userID + * ID of user who was recommended the item + * @param itemID + * ID of item that was recommended + * @param howMany + * maximum number of items to return + * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given + * item to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java new file mode 100644 index 0000000..282ceff --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * Used to retrieve all items that could possibly be similar + */ +public interface MostSimilarItemsCandidateItemsStrategy extends Refreshable { + + FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) throws TasteException; +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java new file mode 100644 index 0000000..1fcece8 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +/** + * <p> + * Implementations encapsulate items that are recommended, and include the item recommended and a value + * expressing the strength of the preference. + * </p> + */ +public interface RecommendedItem { + + /** @return the recommended item ID */ + long getItemID(); + + /** + * <p> + * A value expressing the strength of the preference for the recommended item. The range of the values + * depends on the implementation. Implementations must use larger values to express stronger preference. + * </p> + * + * @return strength of the preference + */ + float getValue(); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java new file mode 100644 index 0000000..4135aff --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import java.util.List; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * <p> + * Implementations of this interface can recommend items for a user. Implementations will likely take + * advantage of several classes in other packages here to compute this. + * </p> + */ +public interface Recommender extends Refreshable { + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @param includeKnownItems + * whether to include items already known by the user in recommendations + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @param rescorer + * rescoring function to apply before final list of recommendations is determined + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @param rescorer + * rescoring function to apply before final list of recommendations is determined + * @param includeKnownItems + * whether to include items already known by the user in recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + + List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException; + + /** + * @param userID + * user ID whose preference is to be estimated + * @param itemID + * item ID to estimate preference for + * @return an estimated preference if the user has not expressed a preference for the item, or else the + * user's actual preference for the item. If a preference cannot be estimated, returns + * {@link Double#NaN} + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + float estimatePreference(long userID, long itemID) throws TasteException; + + /** + * @param userID + * user to set preference for + * @param itemID + * item to set preference for + * @param value + * preference value + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + void setPreference(long userID, long itemID, float value) throws TasteException; + + /** + * @param userID + * user from which to remove preference + * @param itemID + * item for which to remove preference + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + void removePreference(long userID, long itemID) throws TasteException; + + /** + * @return underlying {@link DataModel} used by this {@link Recommender} implementation + */ + DataModel getDataModel(); + +}
