http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java new file mode 100644 index 0000000..ea4df85 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/PlusAnonymousUserLongPrimitiveIterator.java @@ -0,0 +1,90 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import org.apache.mahout.cf.taste.impl.common.AbstractLongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; + +final class PlusAnonymousUserLongPrimitiveIterator extends AbstractLongPrimitiveIterator { + + private final LongPrimitiveIterator delegate; + private final long extraDatum; + private boolean datumConsumed; + + PlusAnonymousUserLongPrimitiveIterator(LongPrimitiveIterator delegate, long extraDatum) { + this.delegate = delegate; + this.extraDatum = extraDatum; + datumConsumed = false; + } + + @Override + public long nextLong() { + if (datumConsumed) { + return delegate.nextLong(); + } else { + if (delegate.hasNext()) { + long delegateNext = delegate.peek(); + if (extraDatum <= delegateNext) { + datumConsumed = true; + return extraDatum; + } else { + return delegate.next(); + } + } else { + datumConsumed = true; + return extraDatum; + } + } + } + + @Override + public long peek() { + if (datumConsumed) { + return delegate.peek(); + } else { + if (delegate.hasNext()) { + long delegateNext = delegate.peek(); + if (extraDatum <= delegateNext) { + return extraDatum; + } else { + return delegateNext; + } + } else { + return extraDatum; + } + } + } + + @Override + public boolean hasNext() { + return !datumConsumed || delegate.hasNext(); + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public void skip(int n) { + for (int i = 0; i < n; i++) { + nextLong(); + } + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java new file mode 100644 index 0000000..da6845e --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java @@ -0,0 +1,759 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model.file; + +import java.io.File; +import java.io.FileFilter; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.model.AbstractDataModel; +import org.apache.mahout.cf.taste.impl.model.GenericBooleanPrefDataModel; +import org.apache.mahout.cf.taste.impl.model.GenericDataModel; +import org.apache.mahout.cf.taste.impl.model.GenericPreference; +import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.FileLineIterator; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; +import com.google.common.base.Splitter; +import com.google.common.collect.Lists; +import com.google.common.io.Closeables; + +/** + * <p> + * A {@link DataModel} backed by a delimited file. This class expects a file where each line + * contains a user ID, followed by item ID, followed by optional preference value, followed by + * optional timestamp. Commas or tabs delimit fields: + * </p> + * + * <p>{@code userID,itemID[,preference[,timestamp]]}</p> + * + * <p> + * Preference value is optional to accommodate applications that have no notion of a + * preference value (that is, the user simply expresses a + * preference for an item, but no degree of preference). + * </p> + * + * <p> + * The preference value is assumed to be parseable as a {@code double}. The user IDs and item IDs are + * read parsed as {@code long}s. The timestamp, if present, is assumed to be parseable as a + * {@code long}, though this can be overridden via {@link #readTimestampFromString(String)}. + * The preference value may be empty, to indicate "no preference value", but cannot be empty. That is, + * this is legal: + * </p> + * + * <p>{@code 123,456,,129050099059}</p> + * + * <p>But this isn't:</p> + * + * <p>{@code 123,456,129050099059}</p> + * + * <p> + * It is also acceptable for the lines to contain additional fields. Fields beyond the third will be ignored. + * An empty line, or one that begins with '#' will be ignored as a comment. + * </p> + * + * <p> + * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file + * has been reloaded very recently already. + * </p> + * + * <p> + * This class will also look for update "delta" files in the same directory, with file names that start the + * same way (up to the first period). These files have the same format, and provide updated data that + * supersedes what is in the main data file. This is a mechanism that allows an application to push updates to + * {@link FileDataModel} without re-copying the entire data file. + * </p> + * + * <p> + * One small format difference exists. Update files must also be able to express deletes. + * This is done by ending with a blank preference value, as in "123,456,". + * </p> + * + * <p> + * Note that it's all-or-nothing -- all of the items in the file must express no preference, or the all must. + * These cannot be mixed. Put another way there will always be the same number of delimiters on every line of + * the file! + * </p> + * + * <p> + * This class is not intended for use with very large amounts of data (over, say, tens of millions of rows). + * For that, a JDBC-backed {@link DataModel} and a database are more appropriate. + * </p> + * + * <p> + * It is possible and likely useful to subclass this class and customize its behavior to accommodate + * application-specific needs and input formats. See {@link #processLine(String, FastByIDMap, FastByIDMap, boolean)} and + * {@link #processLineWithoutID(String, FastByIDMap, FastByIDMap)} + */ +public class FileDataModel extends AbstractDataModel { + + private static final Logger log = LoggerFactory.getLogger(FileDataModel.class); + + public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute? + private static final char COMMENT_CHAR = '#'; + private static final char[] DELIMIETERS = {',', '\t'}; + + private final File dataFile; + private long lastModified; + private long lastUpdateFileModified; + private final Splitter delimiterPattern; + private final boolean hasPrefValues; + private DataModel delegate; + private final ReentrantLock reloadLock; + private final boolean transpose; + private final long minReloadIntervalMS; + + /** + * @param dataFile + * file containing preferences data. If file is compressed (and name ends in .gz or .zip + * accordingly) it will be decompressed as it is read) + * @throws FileNotFoundException + * if dataFile does not exist + * @throws IOException + * if file can't be read + */ + public FileDataModel(File dataFile) throws IOException { + this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS); + } + + /** + * @param delimiterRegex If your data file don't use '\t' or ',' as delimiter, you can specify + * a custom regex pattern. + */ + public FileDataModel(File dataFile, String delimiterRegex) throws IOException { + this(dataFile, false, DEFAULT_MIN_RELOAD_INTERVAL_MS, delimiterRegex); + } + + /** + * @param transpose + * transposes user IDs and item IDs -- convenient for 'flipping' the data model this way + * @param minReloadIntervalMS + * the minimum interval in milliseconds after which a full reload of the original datafile is done + * when refresh() is called + * @see #FileDataModel(File) + */ + public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS) throws IOException { + this(dataFile, transpose, minReloadIntervalMS, null); + } + + /** + * @param delimiterRegex If your data file don't use '\t' or ',' as delimiters, you can specify + * user own using regex pattern. + * @throws IOException + */ + public FileDataModel(File dataFile, boolean transpose, long minReloadIntervalMS, String delimiterRegex) + throws IOException { + + this.dataFile = Preconditions.checkNotNull(dataFile.getAbsoluteFile()); + if (!dataFile.exists() || dataFile.isDirectory()) { + throw new FileNotFoundException(dataFile.toString()); + } + Preconditions.checkArgument(dataFile.length() > 0L, "dataFile is empty"); + Preconditions.checkArgument(minReloadIntervalMS >= 0L, "minReloadIntervalMs must be non-negative"); + + log.info("Creating FileDataModel for file {}", dataFile); + + this.lastModified = dataFile.lastModified(); + this.lastUpdateFileModified = readLastUpdateFileModified(); + + FileLineIterator iterator = new FileLineIterator(dataFile, false); + String firstLine = iterator.peek(); + while (firstLine.isEmpty() || firstLine.charAt(0) == COMMENT_CHAR) { + iterator.next(); + firstLine = iterator.peek(); + } + Closeables.close(iterator, true); + + char delimiter; + if (delimiterRegex == null) { + delimiter = determineDelimiter(firstLine); + delimiterPattern = Splitter.on(delimiter); + } else { + delimiter = '\0'; + delimiterPattern = Splitter.onPattern(delimiterRegex); + if (!delimiterPattern.split(firstLine).iterator().hasNext()) { + throw new IllegalArgumentException("Did not find a delimiter(pattern) in first line"); + } + } + List<String> firstLineSplit = Lists.newArrayList(); + for (String token : delimiterPattern.split(firstLine)) { + firstLineSplit.add(token); + } + // If preference value exists and isn't empty then the file is specifying pref values + hasPrefValues = firstLineSplit.size() >= 3 && !firstLineSplit.get(2).isEmpty(); + + this.reloadLock = new ReentrantLock(); + this.transpose = transpose; + this.minReloadIntervalMS = minReloadIntervalMS; + + reload(); + } + + public File getDataFile() { + return dataFile; + } + + protected void reload() { + if (reloadLock.tryLock()) { + try { + delegate = buildModel(); + } catch (IOException ioe) { + log.warn("Exception while reloading", ioe); + } finally { + reloadLock.unlock(); + } + } + } + + protected DataModel buildModel() throws IOException { + + long newLastModified = dataFile.lastModified(); + long newLastUpdateFileModified = readLastUpdateFileModified(); + + boolean loadFreshData = delegate == null || newLastModified > lastModified + minReloadIntervalMS; + + long oldLastUpdateFileModifieid = lastUpdateFileModified; + lastModified = newLastModified; + lastUpdateFileModified = newLastUpdateFileModified; + + FastByIDMap<FastByIDMap<Long>> timestamps = new FastByIDMap<>(); + + if (hasPrefValues) { + + if (loadFreshData) { + + FastByIDMap<Collection<Preference>> data = new FastByIDMap<>(); + FileLineIterator iterator = new FileLineIterator(dataFile, false); + processFile(iterator, data, timestamps, false); + + for (File updateFile : findUpdateFilesAfter(newLastModified)) { + processFile(new FileLineIterator(updateFile, false), data, timestamps, false); + } + + return new GenericDataModel(GenericDataModel.toDataMap(data, true), timestamps); + + } else { + + FastByIDMap<PreferenceArray> rawData = ((GenericDataModel) delegate).getRawUserData(); + + for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) { + processFile(new FileLineIterator(updateFile, false), rawData, timestamps, true); + } + + return new GenericDataModel(rawData, timestamps); + + } + + } else { + + if (loadFreshData) { + + FastByIDMap<FastIDSet> data = new FastByIDMap<>(); + FileLineIterator iterator = new FileLineIterator(dataFile, false); + processFileWithoutID(iterator, data, timestamps); + + for (File updateFile : findUpdateFilesAfter(newLastModified)) { + processFileWithoutID(new FileLineIterator(updateFile, false), data, timestamps); + } + + return new GenericBooleanPrefDataModel(data, timestamps); + + } else { + + FastByIDMap<FastIDSet> rawData = ((GenericBooleanPrefDataModel) delegate).getRawUserData(); + + for (File updateFile : findUpdateFilesAfter(Math.max(oldLastUpdateFileModifieid, newLastModified))) { + processFileWithoutID(new FileLineIterator(updateFile, false), rawData, timestamps); + } + + return new GenericBooleanPrefDataModel(rawData, timestamps); + + } + + } + } + + /** + * Finds update delta files in the same directory as the data file. This finds any file whose name starts + * the same way as the data file (up to first period) but isn't the data file itself. For example, if the + * data file is /foo/data.txt.gz, you might place update files at /foo/data.1.txt.gz, /foo/data.2.txt.gz, + * etc. + */ + private Iterable<File> findUpdateFilesAfter(long minimumLastModified) { + String dataFileName = dataFile.getName(); + int period = dataFileName.indexOf('.'); + String startName = period < 0 ? dataFileName : dataFileName.substring(0, period); + File parentDir = dataFile.getParentFile(); + Map<Long, File> modTimeToUpdateFile = new TreeMap<>(); + FileFilter onlyFiles = new FileFilter() { + @Override + public boolean accept(File file) { + return !file.isDirectory(); + } + }; + for (File updateFile : parentDir.listFiles(onlyFiles)) { + String updateFileName = updateFile.getName(); + if (updateFileName.startsWith(startName) + && !updateFileName.equals(dataFileName) + && updateFile.lastModified() >= minimumLastModified) { + modTimeToUpdateFile.put(updateFile.lastModified(), updateFile); + } + } + return modTimeToUpdateFile.values(); + } + + private long readLastUpdateFileModified() { + long mostRecentModification = Long.MIN_VALUE; + for (File updateFile : findUpdateFilesAfter(0L)) { + mostRecentModification = Math.max(mostRecentModification, updateFile.lastModified()); + } + return mostRecentModification; + } + + public static char determineDelimiter(String line) { + for (char possibleDelimieter : DELIMIETERS) { + if (line.indexOf(possibleDelimieter) >= 0) { + return possibleDelimieter; + } + } + throw new IllegalArgumentException("Did not find a delimiter in first line"); + } + + protected void processFile(FileLineIterator dataOrUpdateFileIterator, + FastByIDMap<?> data, + FastByIDMap<FastByIDMap<Long>> timestamps, + boolean fromPriorData) { + log.info("Reading file info..."); + int count = 0; + while (dataOrUpdateFileIterator.hasNext()) { + String line = dataOrUpdateFileIterator.next(); + if (!line.isEmpty()) { + processLine(line, data, timestamps, fromPriorData); + if (++count % 1000000 == 0) { + log.info("Processed {} lines", count); + } + } + } + log.info("Read lines: {}", count); + } + + /** + * <p> + * Reads one line from the input file and adds the data to a {@link FastByIDMap} data structure which maps user IDs + * to preferences. This assumes that each line of the input file corresponds to one preference. After + * reading a line and determining which user and item the preference pertains to, the method should look to + * see if the data contains a mapping for the user ID already, and if not, add an empty data structure of preferences + * as appropriate to the data. + * </p> + * + * <p> + * Note that if the line is empty or begins with '#' it will be ignored as a comment. + * </p> + * + * @param line + * line from input data file + * @param data + * all data read so far, as a mapping from user IDs to preferences + * @param fromPriorData an implementation detail -- if true, data will map IDs to + * {@link PreferenceArray} since the framework is attempting to read and update raw + * data that is already in memory. Otherwise it maps to {@link Collection}s of + * {@link Preference}s, since it's reading fresh data. Subclasses must be prepared + * to handle this wrinkle. + */ + protected void processLine(String line, + FastByIDMap<?> data, + FastByIDMap<FastByIDMap<Long>> timestamps, + boolean fromPriorData) { + + // Ignore empty lines and comments + if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) { + return; + } + + Iterator<String> tokens = delimiterPattern.split(line).iterator(); + String userIDString = tokens.next(); + String itemIDString = tokens.next(); + String preferenceValueString = tokens.next(); + boolean hasTimestamp = tokens.hasNext(); + String timestampString = hasTimestamp ? tokens.next() : null; + + long userID = readUserIDFromString(userIDString); + long itemID = readItemIDFromString(itemIDString); + + if (transpose) { + long tmp = userID; + userID = itemID; + itemID = tmp; + } + + // This is kind of gross but need to handle two types of storage + Object maybePrefs = data.get(userID); + if (fromPriorData) { + // Data are PreferenceArray + + PreferenceArray prefs = (PreferenceArray) maybePrefs; + if (!hasTimestamp && preferenceValueString.isEmpty()) { + // Then line is of form "userID,itemID,", meaning remove + if (prefs != null) { + boolean exists = false; + int length = prefs.length(); + for (int i = 0; i < length; i++) { + if (prefs.getItemID(i) == itemID) { + exists = true; + break; + } + } + if (exists) { + if (length == 1) { + data.remove(userID); + } else { + PreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1); + for (int i = 0, j = 0; i < length; i++, j++) { + if (prefs.getItemID(i) == itemID) { + j--; + } else { + newPrefs.set(j, prefs.get(i)); + } + } + ((FastByIDMap<PreferenceArray>) data).put(userID, newPrefs); + } + } + } + + removeTimestamp(userID, itemID, timestamps); + + } else { + + float preferenceValue = Float.parseFloat(preferenceValueString); + + boolean exists = false; + if (prefs != null) { + for (int i = 0; i < prefs.length(); i++) { + if (prefs.getItemID(i) == itemID) { + exists = true; + prefs.setValue(i, preferenceValue); + break; + } + } + } + + if (!exists) { + if (prefs == null) { + prefs = new GenericUserPreferenceArray(1); + } else { + PreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.length() + 1); + for (int i = 0, j = 1; i < prefs.length(); i++, j++) { + newPrefs.set(j, prefs.get(i)); + } + prefs = newPrefs; + } + prefs.setUserID(0, userID); + prefs.setItemID(0, itemID); + prefs.setValue(0, preferenceValue); + ((FastByIDMap<PreferenceArray>) data).put(userID, prefs); + } + } + + addTimestamp(userID, itemID, timestampString, timestamps); + + } else { + // Data are Collection<Preference> + + Collection<Preference> prefs = (Collection<Preference>) maybePrefs; + + if (!hasTimestamp && preferenceValueString.isEmpty()) { + // Then line is of form "userID,itemID,", meaning remove + if (prefs != null) { + // remove pref + Iterator<Preference> prefsIterator = prefs.iterator(); + while (prefsIterator.hasNext()) { + Preference pref = prefsIterator.next(); + if (pref.getItemID() == itemID) { + prefsIterator.remove(); + break; + } + } + } + + removeTimestamp(userID, itemID, timestamps); + + } else { + + float preferenceValue = Float.parseFloat(preferenceValueString); + + boolean exists = false; + if (prefs != null) { + for (Preference pref : prefs) { + if (pref.getItemID() == itemID) { + exists = true; + pref.setValue(preferenceValue); + break; + } + } + } + + if (!exists) { + if (prefs == null) { + prefs = Lists.newArrayListWithCapacity(2); + ((FastByIDMap<Collection<Preference>>) data).put(userID, prefs); + } + prefs.add(new GenericPreference(userID, itemID, preferenceValue)); + } + + addTimestamp(userID, itemID, timestampString, timestamps); + + } + + } + } + + protected void processFileWithoutID(FileLineIterator dataOrUpdateFileIterator, + FastByIDMap<FastIDSet> data, + FastByIDMap<FastByIDMap<Long>> timestamps) { + log.info("Reading file info..."); + int count = 0; + while (dataOrUpdateFileIterator.hasNext()) { + String line = dataOrUpdateFileIterator.next(); + if (!line.isEmpty()) { + processLineWithoutID(line, data, timestamps); + if (++count % 100000 == 0) { + log.info("Processed {} lines", count); + } + } + } + log.info("Read lines: {}", count); + } + + protected void processLineWithoutID(String line, + FastByIDMap<FastIDSet> data, + FastByIDMap<FastByIDMap<Long>> timestamps) { + + if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) { + return; + } + + Iterator<String> tokens = delimiterPattern.split(line).iterator(); + String userIDString = tokens.next(); + String itemIDString = tokens.next(); + boolean hasPreference = tokens.hasNext(); + String preferenceValueString = hasPreference ? tokens.next() : ""; + boolean hasTimestamp = tokens.hasNext(); + String timestampString = hasTimestamp ? tokens.next() : null; + + long userID = readUserIDFromString(userIDString); + long itemID = readItemIDFromString(itemIDString); + + if (transpose) { + long tmp = userID; + userID = itemID; + itemID = tmp; + } + + if (hasPreference && !hasTimestamp && preferenceValueString.isEmpty()) { + // Then line is of form "userID,itemID,", meaning remove + + FastIDSet itemIDs = data.get(userID); + if (itemIDs != null) { + itemIDs.remove(itemID); + } + + removeTimestamp(userID, itemID, timestamps); + + } else { + + FastIDSet itemIDs = data.get(userID); + if (itemIDs == null) { + itemIDs = new FastIDSet(2); + data.put(userID, itemIDs); + } + itemIDs.add(itemID); + + addTimestamp(userID, itemID, timestampString, timestamps); + + } + } + + private void addTimestamp(long userID, + long itemID, + String timestampString, + FastByIDMap<FastByIDMap<Long>> timestamps) { + if (timestampString != null) { + FastByIDMap<Long> itemTimestamps = timestamps.get(userID); + if (itemTimestamps == null) { + itemTimestamps = new FastByIDMap<>(); + timestamps.put(userID, itemTimestamps); + } + long timestamp = readTimestampFromString(timestampString); + itemTimestamps.put(itemID, timestamp); + } + } + + private static void removeTimestamp(long userID, + long itemID, + FastByIDMap<FastByIDMap<Long>> timestamps) { + FastByIDMap<Long> itemTimestamps = timestamps.get(userID); + if (itemTimestamps != null) { + itemTimestamps.remove(itemID); + } + } + + /** + * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by + * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform + * translation. + */ + protected long readUserIDFromString(String value) { + return Long.parseLong(value); + } + + /** + * Subclasses may wish to override this if ID values in the file are not numeric. This provides a hook by + * which subclasses can inject an {@link org.apache.mahout.cf.taste.model.IDMigrator} to perform + * translation. + */ + protected long readItemIDFromString(String value) { + return Long.parseLong(value); + } + + /** + * Subclasses may wish to override this to change how time values in the input file are parsed. + * By default they are expected to be numeric, expressing a time as milliseconds since the epoch. + */ + protected long readTimestampFromString(String value) { + return Long.parseLong(value); + } + + @Override + public LongPrimitiveIterator getUserIDs() throws TasteException { + return delegate.getUserIDs(); + } + + @Override + public PreferenceArray getPreferencesFromUser(long userID) throws TasteException { + return delegate.getPreferencesFromUser(userID); + } + + @Override + public FastIDSet getItemIDsFromUser(long userID) throws TasteException { + return delegate.getItemIDsFromUser(userID); + } + + @Override + public LongPrimitiveIterator getItemIDs() throws TasteException { + return delegate.getItemIDs(); + } + + @Override + public PreferenceArray getPreferencesForItem(long itemID) throws TasteException { + return delegate.getPreferencesForItem(itemID); + } + + @Override + public Float getPreferenceValue(long userID, long itemID) throws TasteException { + return delegate.getPreferenceValue(userID, itemID); + } + + @Override + public Long getPreferenceTime(long userID, long itemID) throws TasteException { + return delegate.getPreferenceTime(userID, itemID); + } + + @Override + public int getNumItems() throws TasteException { + return delegate.getNumItems(); + } + + @Override + public int getNumUsers() throws TasteException { + return delegate.getNumUsers(); + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID) throws TasteException { + return delegate.getNumUsersWithPreferenceFor(itemID); + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException { + return delegate.getNumUsersWithPreferenceFor(itemID1, itemID2); + } + + /** + * Note that this method only updates the in-memory preference data that this {@link FileDataModel} + * maintains; it does not modify any data on disk. Therefore any updates from this method are only + * temporary, and lost when data is reloaded from a file. This method should also be considered relatively + * slow. + */ + @Override + public void setPreference(long userID, long itemID, float value) throws TasteException { + delegate.setPreference(userID, itemID, value); + } + + /** See the warning at {@link #setPreference(long, long, float)}. */ + @Override + public void removePreference(long userID, long itemID) throws TasteException { + delegate.removePreference(userID, itemID); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + if (dataFile.lastModified() > lastModified + minReloadIntervalMS + || readLastUpdateFileModified() > lastUpdateFileModified + minReloadIntervalMS) { + log.debug("File has changed; reloading..."); + reload(); + } + } + + @Override + public boolean hasPreferenceValues() { + return delegate.hasPreferenceValues(); + } + + @Override + public float getMaxPreference() { + return delegate.getMaxPreference(); + } + + @Override + public float getMinPreference() { + return delegate.getMinPreference(); + } + + @Override + public String toString() { + return "FileDataModel[dataFile:" + dataFile + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java new file mode 100644 index 0000000..1bcb4ef --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileIDMigrator.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model.file; + +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.util.Collection; +import java.util.concurrent.locks.ReentrantLock; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.model.AbstractIDMigrator; +import org.apache.mahout.common.iterator.FileLineIterable; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * An {@link org.apache.mahout.cf.taste.model.IDMigrator} backed by a file. + * This class typically expects a file where each line + * contains a single stringID to be stored in this migrator. + * </p> + * + * <p> + * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file + * has been reloaded very recently already. + * </p> + */ +public class FileIDMigrator extends AbstractIDMigrator { + + public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute? + + private final File dataFile; + private FastByIDMap<String> longToString; + private final ReentrantLock reloadLock; + + private long lastModified; + private final long minReloadIntervalMS; + + private static final Logger log = LoggerFactory.getLogger(FileIDMigrator.class); + + public FileIDMigrator(File dataFile) throws FileNotFoundException { + this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS); + } + + public FileIDMigrator(File dataFile, long minReloadIntervalMS) throws FileNotFoundException { + longToString = new FastByIDMap<>(100); + this.dataFile = Preconditions.checkNotNull(dataFile); + if (!dataFile.exists() || dataFile.isDirectory()) { + throw new FileNotFoundException(dataFile.toString()); + } + + log.info("Creating FileReadonlyIDMigrator for file {}", dataFile); + + this.reloadLock = new ReentrantLock(); + this.lastModified = dataFile.lastModified(); + this.minReloadIntervalMS = minReloadIntervalMS; + + reload(); + } + + @Override + public String toStringID(long longID) { + return longToString.get(longID); + } + + private void reload() { + if (reloadLock.tryLock()) { + try { + longToString = buildMapping(); + } catch (IOException ioe) { + throw new IllegalStateException(ioe); + } finally { + reloadLock.unlock(); + } + } + } + + private FastByIDMap<String> buildMapping() throws IOException { + FastByIDMap<String> mapping = new FastByIDMap<>(); + for (String line : new FileLineIterable(dataFile)) { + mapping.put(toLongID(line), line); + } + lastModified = dataFile.lastModified(); + return mapping; + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + if (dataFile.lastModified() > lastModified + minReloadIntervalMS) { + log.debug("File has changed; reloading..."); + reload(); + } + } + + @Override + public String toString() { + return "FileIDMigrator[dataFile:" + dataFile + ']'; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java new file mode 100644 index 0000000..8d33f60 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/AbstractUserNeighborhood.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.neighborhood; + +import java.util.Collection; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import com.google.common.base.Preconditions; + +/** + * <p> + * Contains methods and resources useful to all classes in this package. + * </p> + */ +abstract class AbstractUserNeighborhood implements UserNeighborhood { + + private final UserSimilarity userSimilarity; + private final DataModel dataModel; + private final double samplingRate; + private final RefreshHelper refreshHelper; + + AbstractUserNeighborhood(UserSimilarity userSimilarity, DataModel dataModel, double samplingRate) { + Preconditions.checkArgument(userSimilarity != null, "userSimilarity is null"); + Preconditions.checkArgument(dataModel != null, "dataModel is null"); + Preconditions.checkArgument(samplingRate > 0.0 && samplingRate <= 1.0, "samplingRate must be in (0,1]"); + this.userSimilarity = userSimilarity; + this.dataModel = dataModel; + this.samplingRate = samplingRate; + this.refreshHelper = new RefreshHelper(null); + this.refreshHelper.addDependency(this.dataModel); + this.refreshHelper.addDependency(this.userSimilarity); + } + + final UserSimilarity getUserSimilarity() { + return userSimilarity; + } + + final DataModel getDataModel() { + return dataModel; + } + + final double getSamplingRate() { + return samplingRate; + } + + @Override + public final void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java new file mode 100644 index 0000000..998e476 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/CachingUserNeighborhood.java @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.neighborhood; + +import java.util.Collection; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.Cache; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.impl.common.Retriever; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; + +import com.google.common.base.Preconditions; + +/** A caching wrapper around an underlying {@link UserNeighborhood} implementation. */ +public final class CachingUserNeighborhood implements UserNeighborhood { + + private final UserNeighborhood neighborhood; + private final Cache<Long,long[]> neighborhoodCache; + + public CachingUserNeighborhood(UserNeighborhood neighborhood, DataModel dataModel) throws TasteException { + Preconditions.checkArgument(neighborhood != null, "neighborhood is null"); + this.neighborhood = neighborhood; + int maxCacheSize = dataModel.getNumUsers(); // just a dumb heuristic for sizing + this.neighborhoodCache = new Cache<>(new NeighborhoodRetriever(neighborhood), maxCacheSize); + } + + @Override + public long[] getUserNeighborhood(long userID) throws TasteException { + return neighborhoodCache.get(userID); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + neighborhoodCache.clear(); + Collection<Refreshable> refreshed = RefreshHelper.buildRefreshed(alreadyRefreshed); + RefreshHelper.maybeRefresh(refreshed, neighborhood); + } + + private static final class NeighborhoodRetriever implements Retriever<Long,long[]> { + private final UserNeighborhood neighborhood; + + private NeighborhoodRetriever(UserNeighborhood neighborhood) { + this.neighborhood = neighborhood; + } + + @Override + public long[] get(Long key) throws TasteException { + return neighborhood.getUserNeighborhood(key); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java new file mode 100644 index 0000000..7f3a98a --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/NearestNUserNeighborhood.java @@ -0,0 +1,122 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.neighborhood; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.recommender.TopItems; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import com.google.common.base.Preconditions; + +/** + * <p> + * Computes a neighborhood consisting of the nearest n users to a given user. "Nearest" is defined by the + * given {@link UserSimilarity}. + * </p> + */ +public final class NearestNUserNeighborhood extends AbstractUserNeighborhood { + + private final int n; + private final double minSimilarity; + + /** + * @param n neighborhood size; capped at the number of users in the data model + * @throws IllegalArgumentException + * if {@code n < 1}, or userSimilarity or dataModel are {@code null} + */ + public NearestNUserNeighborhood(int n, UserSimilarity userSimilarity, DataModel dataModel) throws TasteException { + this(n, Double.NEGATIVE_INFINITY, userSimilarity, dataModel, 1.0); + } + + /** + * @param n neighborhood size; capped at the number of users in the data model + * @param minSimilarity minimal similarity required for neighbors + * @throws IllegalArgumentException + * if {@code n < 1}, or userSimilarity or dataModel are {@code null} + */ + public NearestNUserNeighborhood(int n, + double minSimilarity, + UserSimilarity userSimilarity, + DataModel dataModel) throws TasteException { + this(n, minSimilarity, userSimilarity, dataModel, 1.0); + } + + /** + * @param n neighborhood size; capped at the number of users in the data model + * @param minSimilarity minimal similarity required for neighbors + * @param samplingRate percentage of users to consider when building neighborhood -- decrease to trade quality for + * performance + * @throws IllegalArgumentException + * if {@code n < 1} or samplingRate is NaN or not in (0,1], or userSimilarity or dataModel are + * {@code null} + */ + public NearestNUserNeighborhood(int n, + double minSimilarity, + UserSimilarity userSimilarity, + DataModel dataModel, + double samplingRate) throws TasteException { + super(userSimilarity, dataModel, samplingRate); + Preconditions.checkArgument(n >= 1, "n must be at least 1"); + int numUsers = dataModel.getNumUsers(); + this.n = n > numUsers ? numUsers : n; + this.minSimilarity = minSimilarity; + } + + @Override + public long[] getUserNeighborhood(long userID) throws TasteException { + + DataModel dataModel = getDataModel(); + UserSimilarity userSimilarityImpl = getUserSimilarity(); + + TopItems.Estimator<Long> estimator = new Estimator(userSimilarityImpl, userID, minSimilarity); + + LongPrimitiveIterator userIDs = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel.getUserIDs(), + getSamplingRate()); + + return TopItems.getTopUsers(n, userIDs, null, estimator); + } + + @Override + public String toString() { + return "NearestNUserNeighborhood"; + } + + private static final class Estimator implements TopItems.Estimator<Long> { + private final UserSimilarity userSimilarityImpl; + private final long theUserID; + private final double minSim; + + private Estimator(UserSimilarity userSimilarityImpl, long theUserID, double minSim) { + this.userSimilarityImpl = userSimilarityImpl; + this.theUserID = theUserID; + this.minSim = minSim; + } + + @Override + public double estimate(Long userID) throws TasteException { + if (userID == theUserID) { + return Double.NaN; + } + double sim = userSimilarityImpl.userSimilarity(theUserID, userID); + return sim >= minSim ? sim : Double.NaN; + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java new file mode 100644 index 0000000..d5246e4 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/neighborhood/ThresholdUserNeighborhood.java @@ -0,0 +1,104 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.neighborhood; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.SamplingLongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.similarity.UserSimilarity; + +import com.google.common.base.Preconditions; + +/** + * <p> + * Computes a neigbhorhood consisting of all users whose similarity to the given user meets or exceeds a + * certain threshold. Similarity is defined by the given {@link UserSimilarity}. + * </p> + */ +public final class ThresholdUserNeighborhood extends AbstractUserNeighborhood { + + private final double threshold; + + /** + * @param threshold + * similarity threshold + * @param userSimilarity + * similarity metric + * @param dataModel + * data model + * @throws IllegalArgumentException + * if threshold is {@link Double#NaN}, or if samplingRate is not positive and less than or equal + * to 1.0, or if userSimilarity or dataModel are {@code null} + */ + public ThresholdUserNeighborhood(double threshold, UserSimilarity userSimilarity, DataModel dataModel) { + this(threshold, userSimilarity, dataModel, 1.0); + } + + /** + * @param threshold + * similarity threshold + * @param userSimilarity + * similarity metric + * @param dataModel + * data model + * @param samplingRate + * percentage of users to consider when building neighborhood -- decrease to trade quality for + * performance + * @throws IllegalArgumentException + * if threshold or samplingRate is {@link Double#NaN}, or if samplingRate is not positive and less + * than or equal to 1.0, or if userSimilarity or dataModel are {@code null} + */ + public ThresholdUserNeighborhood(double threshold, + UserSimilarity userSimilarity, + DataModel dataModel, + double samplingRate) { + super(userSimilarity, dataModel, samplingRate); + Preconditions.checkArgument(!Double.isNaN(threshold), "threshold must not be NaN"); + this.threshold = threshold; + } + + @Override + public long[] getUserNeighborhood(long userID) throws TasteException { + + DataModel dataModel = getDataModel(); + FastIDSet neighborhood = new FastIDSet(); + LongPrimitiveIterator usersIterable = SamplingLongPrimitiveIterator.maybeWrapIterator(dataModel + .getUserIDs(), getSamplingRate()); + UserSimilarity userSimilarityImpl = getUserSimilarity(); + + while (usersIterable.hasNext()) { + long otherUserID = usersIterable.next(); + if (userID != otherUserID) { + double theSimilarity = userSimilarityImpl.userSimilarity(userID, otherUserID); + if (!Double.isNaN(theSimilarity) && theSimilarity >= threshold) { + neighborhood.add(otherUserID); + } + } + } + + return neighborhood.toArray(); + } + + @Override + public String toString() { + return "ThresholdUserNeighborhood"; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java new file mode 100644 index 0000000..d24ea6a --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractCandidateItemsStrategy.java @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy; +import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy; + +import java.util.Collection; + +/** + * Abstract base implementation for retrieving candidate items to recommend + */ +public abstract class AbstractCandidateItemsStrategy implements CandidateItemsStrategy, + MostSimilarItemsCandidateItemsStrategy { + + protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel) throws TasteException{ + return doGetCandidateItems(preferredItemIDs, dataModel, false); + } + + @Override + public FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel, + boolean includeKnownItems) throws TasteException { + return doGetCandidateItems(preferencesFromUser.getIDs(), dataModel, includeKnownItems); + } + + @Override + public FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) + throws TasteException { + return doGetCandidateItems(itemIDs, dataModel, false); + } + + protected abstract FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, + boolean includeKnownItems) throws TasteException; + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) {} +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java new file mode 100644 index 0000000..3a62b08 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AbstractRecommender.java @@ -0,0 +1,140 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy; + +import java.util.List; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +public abstract class AbstractRecommender implements Recommender { + + private static final Logger log = LoggerFactory.getLogger(AbstractRecommender.class); + + private final DataModel dataModel; + private final CandidateItemsStrategy candidateItemsStrategy; + + protected AbstractRecommender(DataModel dataModel, CandidateItemsStrategy candidateItemsStrategy) { + this.dataModel = Preconditions.checkNotNull(dataModel); + this.candidateItemsStrategy = Preconditions.checkNotNull(candidateItemsStrategy); + } + + protected AbstractRecommender(DataModel dataModel) { + this(dataModel, getDefaultCandidateItemsStrategy()); + } + + protected static CandidateItemsStrategy getDefaultCandidateItemsStrategy() { + return new PreferredItemsNeighborhoodCandidateItemsStrategy(); + } + + + /** + * <p> + * Default implementation which just calls + * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a + * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing. + * </p> + */ + @Override + public List<RecommendedItem> recommend(long userID, int howMany) throws TasteException { + return recommend(userID, howMany, null, false); + } + + /** + * <p> + * Default implementation which just calls + * {@link Recommender#recommend(long, int, org.apache.mahout.cf.taste.recommender.IDRescorer)}, with a + * {@link org.apache.mahout.cf.taste.recommender.Rescorer} that does nothing. + * </p> + */ + @Override + public List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException { + return recommend(userID, howMany, null, includeKnownItems); + } + + /** + * <p> Delegates to {@link Recommender#recommend(long, int, IDRescorer, boolean)} + */ + @Override + public List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException{ + return recommend(userID, howMany,rescorer, false); + } + + /** + * <p> + * Default implementation which just calls {@link DataModel#setPreference(long, long, float)}. + * </p> + * + * @throws IllegalArgumentException + * if userID or itemID is {@code null}, or if value is {@link Double#NaN} + */ + @Override + public void setPreference(long userID, long itemID, float value) throws TasteException { + Preconditions.checkArgument(!Float.isNaN(value), "NaN value"); + log.debug("Setting preference for user {}, item {}", userID, itemID); + dataModel.setPreference(userID, itemID, value); + } + + /** + * <p> + * Default implementation which just calls {@link DataModel#removePreference(long, long)} (Object, Object)}. + * </p> + * + * @throws IllegalArgumentException + * if userID or itemID is {@code null} + */ + @Override + public void removePreference(long userID, long itemID) throws TasteException { + log.debug("Remove preference for user '{}', item '{}'", userID, itemID); + dataModel.removePreference(userID, itemID); + } + + @Override + public DataModel getDataModel() { + return dataModel; + } + + /** + * @param userID + * ID of user being evaluated + * @param preferencesFromUser + * the preferences from the user + * @param includeKnownItems + * whether to include items already known by the user in recommendations + * @return all items in the {@link DataModel} for which the user has not expressed a preference and could + * possibly be recommended to the user + * @throws TasteException + * if an error occurs while listing items + */ + protected FastIDSet getAllOtherItems(long userID, PreferenceArray preferencesFromUser, boolean includeKnownItems) + throws TasteException { + return candidateItemsStrategy.getCandidateItems(userID, preferencesFromUser, dataModel, includeKnownItems); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java new file mode 100644 index 0000000..37389a7 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllSimilarItemsCandidateItemsStrategy.java @@ -0,0 +1,50 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import com.google.common.base.Preconditions; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; + +/** + * returns the result of {@link ItemSimilarity#allSimilarItemIDs(long)} as candidate items + */ +public class AllSimilarItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy { + + private final ItemSimilarity similarity; + + public AllSimilarItemsCandidateItemsStrategy(ItemSimilarity similarity) { + Preconditions.checkArgument(similarity != null, "similarity is null"); + this.similarity = similarity; + } + + @Override + protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems) + throws TasteException { + FastIDSet candidateItemIDs = new FastIDSet(); + for (long itemID : preferredItemIDs) { + candidateItemIDs.addAll(similarity.allSimilarItemIDs(itemID)); + } + if (!includeKnownItems) { + candidateItemIDs.removeAll(preferredItemIDs); + } + return candidateItemIDs; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java new file mode 100644 index 0000000..929eddd --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/AllUnknownItemsCandidateItemsStrategy.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; + +public final class AllUnknownItemsCandidateItemsStrategy extends AbstractCandidateItemsStrategy { + + /** return all items the user has not yet seen */ + @Override + protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems) + throws TasteException { + FastIDSet possibleItemIDs = new FastIDSet(dataModel.getNumItems()); + LongPrimitiveIterator allItemIDs = dataModel.getItemIDs(); + while (allItemIDs.hasNext()) { + possibleItemIDs.add(allItemIDs.nextLong()); + } + if (!includeKnownItems) { + possibleItemIDs.removeAll(preferredItemIDs); + } + return possibleItemIDs; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java new file mode 100644 index 0000000..1677ea8 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByRescoreComparator.java @@ -0,0 +1,65 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.io.Serializable; +import java.util.Comparator; + +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; + +/** + * <p> + * Defines ordering on {@link RecommendedItem} by the rescored value of the recommendations' estimated + * preference value, from high to low. + * </p> + */ +final class ByRescoreComparator implements Comparator<RecommendedItem>, Serializable { + + private final IDRescorer rescorer; + + ByRescoreComparator(IDRescorer rescorer) { + this.rescorer = rescorer; + } + + @Override + public int compare(RecommendedItem o1, RecommendedItem o2) { + double rescored1; + double rescored2; + if (rescorer == null) { + rescored1 = o1.getValue(); + rescored2 = o2.getValue(); + } else { + rescored1 = rescorer.rescore(o1.getItemID(), o1.getValue()); + rescored2 = rescorer.rescore(o2.getItemID(), o2.getValue()); + } + if (rescored1 < rescored2) { + return 1; + } else if (rescored1 > rescored2) { + return -1; + } else { + return 0; + } + } + + @Override + public String toString() { + return "ByRescoreComparator[rescorer:" + rescorer + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java new file mode 100644 index 0000000..57c5f3d --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/ByValueRecommendedItemComparator.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.io.Serializable; +import java.util.Comparator; + +import org.apache.mahout.cf.taste.recommender.RecommendedItem; + +/** + * Defines a natural ordering from most-preferred item (highest value) to least-preferred. + */ +public final class ByValueRecommendedItemComparator implements Comparator<RecommendedItem>, Serializable { + + private static final Comparator<RecommendedItem> INSTANCE = new ByValueRecommendedItemComparator(); + + public static Comparator<RecommendedItem> getInstance() { + return INSTANCE; + } + + @Override + public int compare(RecommendedItem o1, RecommendedItem o2) { + float value1 = o1.getValue(); + float value2 = o2.getValue(); + return value1 > value2 ? -1 : value1 < value2 ? 1 : 0; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java new file mode 100644 index 0000000..7ed8cc3 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/CachingRecommender.java @@ -0,0 +1,251 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.concurrent.Callable; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.Cache; +import org.apache.mahout.cf.taste.impl.common.RefreshHelper; +import org.apache.mahout.cf.taste.impl.common.Retriever; +import org.apache.mahout.cf.taste.impl.model.PlusAnonymousUserDataModel; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.recommender.IDRescorer; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.apache.mahout.common.LongPair; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A {@link Recommender} which caches the results from another {@link Recommender} in memory. + * + * TODO: Should be checked for thread safety + * </p> + */ +public final class CachingRecommender implements Recommender { + + private static final Logger log = LoggerFactory.getLogger(CachingRecommender.class); + + private final Recommender recommender; + private final int[] maxHowMany; + private final Retriever<Long,Recommendations> recommendationsRetriever; + private final Cache<Long,Recommendations> recommendationCache; + private final Cache<LongPair,Float> estimatedPrefCache; + private final RefreshHelper refreshHelper; + private IDRescorer currentRescorer; + private boolean currentlyIncludeKnownItems; + + public CachingRecommender(Recommender recommender) throws TasteException { + Preconditions.checkArgument(recommender != null, "recommender is null"); + this.recommender = recommender; + maxHowMany = new int[]{1}; + // Use "num users" as an upper limit on cache size. Rough guess. + int numUsers = recommender.getDataModel().getNumUsers(); + recommendationsRetriever = new RecommendationRetriever(); + recommendationCache = new Cache<>(recommendationsRetriever, numUsers); + estimatedPrefCache = new Cache<>(new EstimatedPrefRetriever(), numUsers); + refreshHelper = new RefreshHelper(new Callable<Object>() { + @Override + public Object call() { + clear(); + return null; + } + }); + refreshHelper.addDependency(recommender); + } + + private void setCurrentRescorer(IDRescorer rescorer) { + if (rescorer == null) { + if (currentRescorer != null) { + currentRescorer = null; + clear(); + } + } else { + if (!rescorer.equals(currentRescorer)) { + currentRescorer = rescorer; + clear(); + } + } + } + + public void setCurrentlyIncludeKnownItems(boolean currentlyIncludeKnownItems) { + this.currentlyIncludeKnownItems = currentlyIncludeKnownItems; + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany) throws TasteException { + return recommend(userID, howMany, null, false); + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException { + return recommend(userID, howMany, null, includeKnownItems); + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany,IDRescorer rescorer) throws TasteException { + return recommend(userID, howMany, rescorer, false); + } + + @Override + public List<RecommendedItem> recommend(long userID, int howMany,IDRescorer rescorer, boolean includeKnownItems) + throws TasteException { + Preconditions.checkArgument(howMany >= 1, "howMany must be at least 1"); + synchronized (maxHowMany) { + if (howMany > maxHowMany[0]) { + maxHowMany[0] = howMany; + } + } + + // Special case, avoid caching an anonymous user + if (userID == PlusAnonymousUserDataModel.TEMP_USER_ID) { + return recommendationsRetriever.get(PlusAnonymousUserDataModel.TEMP_USER_ID).getItems(); + } + + setCurrentRescorer(rescorer); + setCurrentlyIncludeKnownItems(includeKnownItems); + + Recommendations recommendations = recommendationCache.get(userID); + if (recommendations.getItems().size() < howMany && !recommendations.isNoMoreRecommendableItems()) { + clear(userID); + recommendations = recommendationCache.get(userID); + if (recommendations.getItems().size() < howMany) { + recommendations.setNoMoreRecommendableItems(true); + } + } + + List<RecommendedItem> recommendedItems = recommendations.getItems(); + return recommendedItems.size() > howMany ? recommendedItems.subList(0, howMany) : recommendedItems; + } + + @Override + public float estimatePreference(long userID, long itemID) throws TasteException { + return estimatedPrefCache.get(new LongPair(userID, itemID)); + } + + @Override + public void setPreference(long userID, long itemID, float value) throws TasteException { + recommender.setPreference(userID, itemID, value); + clear(userID); + } + + @Override + public void removePreference(long userID, long itemID) throws TasteException { + recommender.removePreference(userID, itemID); + clear(userID); + } + + @Override + public DataModel getDataModel() { + return recommender.getDataModel(); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + refreshHelper.refresh(alreadyRefreshed); + } + + /** + * <p> + * Clears cached recommendations for the given user. + * </p> + * + * @param userID + * clear cached data associated with this user ID + */ + public void clear(final long userID) { + log.debug("Clearing recommendations for user ID '{}'", userID); + recommendationCache.remove(userID); + estimatedPrefCache.removeKeysMatching(new Cache.MatchPredicate<LongPair>() { + @Override + public boolean matches(LongPair userItemPair) { + return userItemPair.getFirst() == userID; + } + }); + } + + /** + * <p> + * Clears all cached recommendations. + * </p> + */ + public void clear() { + log.debug("Clearing all recommendations..."); + recommendationCache.clear(); + estimatedPrefCache.clear(); + } + + @Override + public String toString() { + return "CachingRecommender[recommender:" + recommender + ']'; + } + + private final class RecommendationRetriever implements Retriever<Long,Recommendations> { + @Override + public Recommendations get(Long key) throws TasteException { + log.debug("Retrieving new recommendations for user ID '{}'", key); + int howMany = maxHowMany[0]; + IDRescorer rescorer = currentRescorer; + List<RecommendedItem> recommendations = + rescorer == null ? recommender.recommend(key, howMany, null, currentlyIncludeKnownItems) : + recommender.recommend(key, howMany, rescorer, currentlyIncludeKnownItems); + return new Recommendations(Collections.unmodifiableList(recommendations)); + } + } + + private final class EstimatedPrefRetriever implements Retriever<LongPair,Float> { + @Override + public Float get(LongPair key) throws TasteException { + long userID = key.getFirst(); + long itemID = key.getSecond(); + log.debug("Retrieving estimated preference for user ID '{}' and item ID '{}'", userID, itemID); + return recommender.estimatePreference(userID, itemID); + } + } + + private static final class Recommendations { + + private final List<RecommendedItem> items; + private boolean noMoreRecommendableItems; + + private Recommendations(List<RecommendedItem> items) { + this.items = items; + } + + List<RecommendedItem> getItems() { + return items; + } + + boolean isNoMoreRecommendableItems() { + return noMoreRecommendableItems; + } + + void setNoMoreRecommendableItems(boolean noMoreRecommendableItems) { + this.noMoreRecommendableItems = noMoreRecommendableItems; + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java new file mode 100644 index 0000000..f0f389f --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/EstimatedPreferenceCapper.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * Simple class which encapsulates restricting a preference value + * to a predefined range. The simple logic is wrapped up here for + * performance reasons. + */ +public final class EstimatedPreferenceCapper { + + private final float min; + private final float max; + + public EstimatedPreferenceCapper(DataModel model) { + min = model.getMinPreference(); + max = model.getMaxPreference(); + } + + public float capEstimate(float estimate) { + if (estimate > max) { + estimate = max; + } else if (estimate < min) { + estimate = min; + } + return estimate; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/b988c493/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java ---------------------------------------------------------------------- diff --git a/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java new file mode 100644 index 0000000..40e21a3 --- /dev/null +++ b/mr/src/main/java/org/apache/mahout/cf/taste/impl/recommender/GenericBooleanPrefItemBasedRecommender.java @@ -0,0 +1,71 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.recommender; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.CandidateItemsStrategy; +import org.apache.mahout.cf.taste.recommender.MostSimilarItemsCandidateItemsStrategy; +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; + +/** + * A variant on {@link GenericItemBasedRecommender} which is appropriate for use when no notion of preference + * value exists in the data. + * + * @see org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefUserBasedRecommender + */ +public final class GenericBooleanPrefItemBasedRecommender extends GenericItemBasedRecommender { + + public GenericBooleanPrefItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity) { + super(dataModel, similarity); + } + + public GenericBooleanPrefItemBasedRecommender(DataModel dataModel, ItemSimilarity similarity, + CandidateItemsStrategy candidateItemsStrategy, MostSimilarItemsCandidateItemsStrategy + mostSimilarItemsCandidateItemsStrategy) { + super(dataModel, similarity, candidateItemsStrategy, mostSimilarItemsCandidateItemsStrategy); + } + + /** + * This computation is in a technical sense, wrong, since in the domain of "boolean preference users" where + * all preference values are 1, this method should only ever return 1.0 or NaN. This isn't terribly useful + * however since it means results can't be ranked by preference value (all are 1). So instead this returns a + * sum of similarities. + */ + @Override + protected float doEstimatePreference(long userID, PreferenceArray preferencesFromUser, long itemID) + throws TasteException { + double[] similarities = getSimilarity().itemSimilarities(itemID, preferencesFromUser.getIDs()); + boolean foundAPref = false; + double totalSimilarity = 0.0; + for (double theSimilarity : similarities) { + if (!Double.isNaN(theSimilarity)) { + foundAPref = true; + totalSimilarity += theSimilarity; + } + } + return foundAPref ? (float) totalSimilarity : Float.NaN; + } + + @Override + public String toString() { + return "GenericBooleanPrefItemBasedRecommender"; + } + +}
