http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java new file mode 100644 index 0000000..712b96a --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/file/FileItemSimilarity.java @@ -0,0 +1,137 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.file; + +import java.io.File; +import java.util.Collection; +import java.util.concurrent.locks.ReentrantLock; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity; +import org.apache.mahout.cf.taste.similarity.ItemSimilarity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * An {@link ItemSimilarity} backed by a comma-delimited file. This class typically expects a file where each line + * contains an item ID, followed by another item ID, followed by a similarity value, separated by commas. You may also + * use tabs. + * </p> + * + * <p> + * The similarity value is assumed to be parseable as a {@code double} having a value between -1 and 1. The + * item IDs are parsed as {@code long}s. Similarities are symmetric so for a pair of items you do not have to + * include 2 lines in the file. + * </p> + * + * <p> + * This class will reload data from the data file when {@link #refresh(Collection)} is called, unless the file + * has been reloaded very recently already. + * </p> + * + * <p> + * This class is not intended for use with very large amounts of data. For that, a JDBC-backed {@link ItemSimilarity} + * and a database are more appropriate. + * </p> + */ +public class FileItemSimilarity implements ItemSimilarity { + + public static final long DEFAULT_MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute? + + private ItemSimilarity delegate; + private final ReentrantLock reloadLock; + private final File dataFile; + private long lastModified; + private final long minReloadIntervalMS; + + private static final Logger log = LoggerFactory.getLogger(FileItemSimilarity.class); + + /** + * @param dataFile + * file containing the similarity data + */ + public FileItemSimilarity(File dataFile) { + this(dataFile, DEFAULT_MIN_RELOAD_INTERVAL_MS); + } + + /** + * @param minReloadIntervalMS + * the minimum interval in milliseconds after which a full reload of the original datafile is done + * when refresh() is called + * @see #FileItemSimilarity(File) + */ + public FileItemSimilarity(File dataFile, long minReloadIntervalMS) { + Preconditions.checkArgument(dataFile != null, "dataFile is null"); + Preconditions.checkArgument(dataFile.exists() && !dataFile.isDirectory(), + "dataFile is missing or a directory: %s", dataFile); + + log.info("Creating FileItemSimilarity for file {}", dataFile); + + this.dataFile = dataFile.getAbsoluteFile(); + this.lastModified = dataFile.lastModified(); + this.minReloadIntervalMS = minReloadIntervalMS; + this.reloadLock = new ReentrantLock(); + + reload(); + } + + @Override + public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException { + return delegate.itemSimilarities(itemID1, itemID2s); + } + + @Override + public long[] allSimilarItemIDs(long itemID) throws TasteException { + return delegate.allSimilarItemIDs(itemID); + } + + @Override + public double itemSimilarity(long itemID1, long itemID2) throws TasteException { + return delegate.itemSimilarity(itemID1, itemID2); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + if (dataFile.lastModified() > lastModified + minReloadIntervalMS) { + log.debug("File has changed; reloading..."); + reload(); + } + } + + protected void reload() { + if (reloadLock.tryLock()) { + try { + long newLastModified = dataFile.lastModified(); + delegate = new GenericItemSimilarity(new FileItemItemSimilarityIterable(dataFile)); + lastModified = newLastModified; + } finally { + reloadLock.unlock(); + } + } + } + + @Override + public String toString() { + return "FileItemSimilarity[dataFile:" + dataFile + ']'; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java new file mode 100644 index 0000000..631ec9b --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/FileSimilarItemsWriter.java @@ -0,0 +1,67 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.precompute; + +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.OutputStreamWriter; + +import com.google.common.io.Closeables; +import org.apache.commons.io.Charsets; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter; + +/** + * Persist the precomputed item similarities to a file that can later be used + * by a {@link org.apache.mahout.cf.taste.impl.similarity.file.FileItemSimilarity} + */ +public class FileSimilarItemsWriter implements SimilarItemsWriter { + + private final File file; + private BufferedWriter writer; + + public FileSimilarItemsWriter(File file) { + this.file = file; + } + + @Override + public void open() throws IOException { + writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(file), Charsets.UTF_8)); + } + + @Override + public void add(SimilarItems similarItems) throws IOException { + String itemID = String.valueOf(similarItems.getItemID()); + for (SimilarItem similarItem : similarItems.getSimilarItems()) { + writer.write(itemID); + writer.write(','); + writer.write(String.valueOf(similarItem.getItemID())); + writer.write(','); + writer.write(String.valueOf(similarItem.getSimilarity())); + writer.newLine(); + } + } + + @Override + public void close() throws IOException { + Closeables.close(writer, false); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java new file mode 100644 index 0000000..b7b52cf --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/impl/similarity/precompute/MultithreadedBatchItemSimilarities.java @@ -0,0 +1,230 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.similarity.precompute; + +import com.google.common.io.Closeables; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.similarity.precompute.BatchItemSimilarities; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItems; +import org.apache.mahout.cf.taste.similarity.precompute.SimilarItemsWriter; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Precompute item similarities in parallel on a single machine. The recommender given to this class must use a + * DataModel that holds the interactions in memory (such as + * {@link org.apache.mahout.cf.taste.impl.model.GenericDataModel} or + * {@link org.apache.mahout.cf.taste.impl.model.file.FileDataModel}) as fast random access to the data is required + */ +public class MultithreadedBatchItemSimilarities extends BatchItemSimilarities { + + private int batchSize; + + private static final int DEFAULT_BATCH_SIZE = 100; + + private static final Logger log = LoggerFactory.getLogger(MultithreadedBatchItemSimilarities.class); + + /** + * @param recommender recommender to use + * @param similarItemsPerItem number of similar items to compute per item + */ + public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem) { + this(recommender, similarItemsPerItem, DEFAULT_BATCH_SIZE); + } + + /** + * @param recommender recommender to use + * @param similarItemsPerItem number of similar items to compute per item + * @param batchSize size of item batches sent to worker threads + */ + public MultithreadedBatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem, int batchSize) { + super(recommender, similarItemsPerItem); + this.batchSize = batchSize; + } + + @Override + public int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, SimilarItemsWriter writer) + throws IOException { + + ExecutorService executorService = Executors.newFixedThreadPool(degreeOfParallelism + 1); + + Output output = null; + try { + writer.open(); + + DataModel dataModel = getRecommender().getDataModel(); + + BlockingQueue<long[]> itemsIDsInBatches = queueItemIDsInBatches(dataModel, batchSize, degreeOfParallelism); + BlockingQueue<List<SimilarItems>> results = new LinkedBlockingQueue<>(); + + AtomicInteger numActiveWorkers = new AtomicInteger(degreeOfParallelism); + for (int n = 0; n < degreeOfParallelism; n++) { + executorService.execute(new SimilarItemsWorker(n, itemsIDsInBatches, results, numActiveWorkers)); + } + + output = new Output(results, writer, numActiveWorkers); + executorService.execute(output); + + } catch (Exception e) { + throw new IOException(e); + } finally { + executorService.shutdown(); + try { + boolean succeeded = executorService.awaitTermination(maxDurationInHours, TimeUnit.HOURS); + if (!succeeded) { + throw new RuntimeException("Unable to complete the computation in " + maxDurationInHours + " hours!"); + } + } catch (InterruptedException e) { + throw new RuntimeException(e); + } + Closeables.close(writer, false); + } + + return output.getNumSimilaritiesProcessed(); + } + + private static BlockingQueue<long[]> queueItemIDsInBatches(DataModel dataModel, int batchSize, + int degreeOfParallelism) + throws TasteException { + + LongPrimitiveIterator itemIDs = dataModel.getItemIDs(); + int numItems = dataModel.getNumItems(); + + BlockingQueue<long[]> itemIDBatches = new LinkedBlockingQueue<>((numItems / batchSize) + 1); + + long[] batch = new long[batchSize]; + int pos = 0; + while (itemIDs.hasNext()) { + batch[pos] = itemIDs.nextLong(); + pos++; + if (pos == batchSize) { + itemIDBatches.add(batch.clone()); + pos = 0; + } + } + + if (pos > 0) { + long[] lastBatch = new long[pos]; + System.arraycopy(batch, 0, lastBatch, 0, pos); + itemIDBatches.add(lastBatch); + } + + if (itemIDBatches.size() < degreeOfParallelism) { + throw new IllegalStateException("Degree of parallelism [" + degreeOfParallelism + "] " + + " is larger than number of batches [" + itemIDBatches.size() +"]."); + } + + log.info("Queued {} items in {} batches", numItems, itemIDBatches.size()); + + return itemIDBatches; + } + + + private static class Output implements Runnable { + + private final BlockingQueue<List<SimilarItems>> results; + private final SimilarItemsWriter writer; + private final AtomicInteger numActiveWorkers; + private int numSimilaritiesProcessed = 0; + + Output(BlockingQueue<List<SimilarItems>> results, SimilarItemsWriter writer, AtomicInteger numActiveWorkers) { + this.results = results; + this.writer = writer; + this.numActiveWorkers = numActiveWorkers; + } + + private int getNumSimilaritiesProcessed() { + return numSimilaritiesProcessed; + } + + @Override + public void run() { + while (numActiveWorkers.get() != 0 || !results.isEmpty()) { + try { + List<SimilarItems> similarItemsOfABatch = results.poll(10, TimeUnit.MILLISECONDS); + if (similarItemsOfABatch != null) { + for (SimilarItems similarItems : similarItemsOfABatch) { + writer.add(similarItems); + numSimilaritiesProcessed += similarItems.numSimilarItems(); + } + } + } catch (Exception e) { + throw new RuntimeException(e); + } + } + } + } + + private class SimilarItemsWorker implements Runnable { + + private final int number; + private final BlockingQueue<long[]> itemIDBatches; + private final BlockingQueue<List<SimilarItems>> results; + private final AtomicInteger numActiveWorkers; + + SimilarItemsWorker(int number, BlockingQueue<long[]> itemIDBatches, BlockingQueue<List<SimilarItems>> results, + AtomicInteger numActiveWorkers) { + this.number = number; + this.itemIDBatches = itemIDBatches; + this.results = results; + this.numActiveWorkers = numActiveWorkers; + } + + @Override + public void run() { + + int numBatchesProcessed = 0; + while (!itemIDBatches.isEmpty()) { + try { + long[] itemIDBatch = itemIDBatches.take(); + + List<SimilarItems> similarItemsOfBatch = new ArrayList<>(itemIDBatch.length); + for (long itemID : itemIDBatch) { + List<RecommendedItem> similarItems = getRecommender().mostSimilarItems(itemID, getSimilarItemsPerItem()); + similarItemsOfBatch.add(new SimilarItems(itemID, similarItems)); + } + + results.offer(similarItemsOfBatch); + + if (++numBatchesProcessed % 5 == 0) { + log.info("worker {} processed {} batches", number, numBatchesProcessed); + } + + } catch (Exception e) { + throw new RuntimeException(e); + } + } + log.info("worker {} processed {} batches. done.", number, numBatchesProcessed); + numActiveWorkers.decrementAndGet(); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java new file mode 100644 index 0000000..022d02d --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/DataModel.java @@ -0,0 +1,199 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import java.io.Serializable; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; + +/** + * <p> + * Implementations represent a repository of information about users and their associated {@link Preference}s + * for items. + * </p> + */ +public interface DataModel extends Refreshable, Serializable { + + /** + * @return all user IDs in the model, in order + * @throws TasteException + * if an error occurs while accessing the data + */ + LongPrimitiveIterator getUserIDs() throws TasteException; + + /** + * @param userID + * ID of user to get prefs for + * @return user's preferences, ordered by item ID + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + PreferenceArray getPreferencesFromUser(long userID) throws TasteException; + + /** + * @param userID + * ID of user to get prefs for + * @return IDs of items user expresses a preference for + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + FastIDSet getItemIDsFromUser(long userID) throws TasteException; + + /** + * @return a {@link LongPrimitiveIterator} of all item IDs in the model, in order + * @throws TasteException + * if an error occurs while accessing the data + */ + LongPrimitiveIterator getItemIDs() throws TasteException; + + /** + * @param itemID + * item ID + * @return all existing {@link Preference}s expressed for that item, ordered by user ID, as an array + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + PreferenceArray getPreferencesForItem(long itemID) throws TasteException; + + /** + * Retrieves the preference value for a single user and item. + * + * @param userID + * user ID to get pref value from + * @param itemID + * item ID to get pref value for + * @return preference value from the given user for the given item or null if none exists + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + Float getPreferenceValue(long userID, long itemID) throws TasteException; + + /** + * Retrieves the time at which a preference value from a user and item was set, if known. + * Time is expressed in the usual way, as a number of milliseconds since the epoch. + * + * @param userID user ID for preference in question + * @param itemID item ID for preference in question + * @return time at which preference was set or null if no preference exists or its time is not known + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException if the user does not exist + * @throws TasteException if an error occurs while accessing the data + */ + Long getPreferenceTime(long userID, long itemID) throws TasteException; + + /** + * @return total number of items known to the model. This is generally the union of all items preferred by + * at least one user but could include more. + * @throws TasteException + * if an error occurs while accessing the data + */ + int getNumItems() throws TasteException; + + /** + * @return total number of users known to the model. + * @throws TasteException + * if an error occurs while accessing the data + */ + int getNumUsers() throws TasteException; + + /** + * @param itemID item ID to check for + * @return the number of users who have expressed a preference for the item + * @throws TasteException if an error occurs while accessing the data + */ + int getNumUsersWithPreferenceFor(long itemID) throws TasteException; + + /** + * @param itemID1 first item ID to check for + * @param itemID2 second item ID to check for + * @return the number of users who have expressed a preference for the items + * @throws TasteException if an error occurs while accessing the data + */ + int getNumUsersWithPreferenceFor(long itemID1, long itemID2) throws TasteException; + + /** + * <p> + * Sets a particular preference (item plus rating) for a user. + * </p> + * + * @param userID + * user to set preference for + * @param itemID + * item to set preference for + * @param value + * preference value + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + void setPreference(long userID, long itemID, float value) throws TasteException; + + /** + * <p> + * Removes a particular preference for a user. + * </p> + * + * @param userID + * user from which to remove preference + * @param itemID + * item to remove preference for + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if the item does not exist + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if the user does not exist + * @throws TasteException + * if an error occurs while accessing the data + */ + void removePreference(long userID, long itemID) throws TasteException; + + /** + * @return true if this implementation actually stores and returns distinct preference values; + * that is, if it is not a 'boolean' DataModel + */ + boolean hasPreferenceValues(); + + /** + * @return the maximum preference value that is possible in the current problem domain being evaluated. For + * example, if the domain is movie ratings on a scale of 1 to 5, this should be 5. While a + * {@link org.apache.mahout.cf.taste.recommender.Recommender} may estimate a preference value above 5.0, it + * isn't "fair" to consider that the system is actually suggesting an impossible rating of, say, 5.4 stars. + * In practice the application would cap this estimate to 5.0. Since evaluators evaluate + * the difference between estimated and actual value, this at least prevents this effect from unfairly + * penalizing a {@link org.apache.mahout.cf.taste.recommender.Recommender} + */ + float getMaxPreference(); + + /** + * @see #getMaxPreference() + */ + float getMinPreference(); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java new file mode 100644 index 0000000..cc477fe --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/IDMigrator.java @@ -0,0 +1,63 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Mahout 0.2 changed the framework to operate only in terms of numeric (long) ID values for users and items. + * This is, obviously, not compatible with applications that used other key types -- most commonly + * {@link String}. Implementation of this class provide support for mapping String to longs and vice versa in + * order to provide a smoother migration path to applications that must still use strings as IDs. + * </p> + * + * <p> + * The mapping from strings to 64-bit numeric values is fixed here, to provide a standard implementation that + * is 'portable' or reproducible outside the framework easily. See {@link #toLongID(String)}. + * </p> + * + * <p> + * Because this mapping is deterministically computable, it does not need to be stored. Indeed, subclasses' + * job is to store the reverse mapping. There are an infinite number of strings but only a fixed number of + * longs, so, it is possible for two strings to map to the same value. Subclasses do not treat this as an + * error but rather retain only the most recent mapping, overwriting a previous mapping. The probability of + * collision in a 64-bit space is quite small, but not zero. However, in the context of a collaborative + * filtering problem, the consequence of a collision is small, at worst -- perhaps one user receives another + * recommendations. + * </p> + * + * @since 0.2 + */ +public interface IDMigrator extends Refreshable { + + /** + * @return the top 8 bytes of the MD5 hash of the bytes of the given {@link String}'s UTF-8 encoding as a + * long. + */ + long toLongID(String stringID); + + /** + * @return the string ID most recently associated with the given long ID, or null if doesn't exist + * @throws TasteException + * if an error occurs while retrieving the mapping + */ + String toStringID(long longID) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java new file mode 100644 index 0000000..e91ed48 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/JDBCDataModel.java @@ -0,0 +1,43 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import javax.sql.DataSource; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; + +public interface JDBCDataModel extends DataModel { + + /** + * @return {@link DataSource} underlying this model + */ + DataSource getDataSource(); + + /** + * Hmm, should this exist elsewhere? seems like most relevant for a DB implementation, which is not in + * memory, which might want to export to memory. + * + * @return all user preference data + */ + FastByIDMap<PreferenceArray> exportWithPrefs() throws TasteException; + + FastByIDMap<FastIDSet> exportWithIDsOnly() throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java new file mode 100644 index 0000000..fe0150a --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/Preference.java @@ -0,0 +1,48 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +/** + * <p> + * A {@link Preference} encapsulates an item and a preference value, which indicates the strength of the + * preference for it. {@link Preference}s are associated to users. + * </p> + */ +public interface Preference { + + /** @return ID of user who prefers the item */ + long getUserID(); + + /** @return item ID that is preferred */ + long getItemID(); + + /** + * @return strength of the preference for that item. Zero should indicate "no preference either way"; + * positive values indicate preference and negative values indicate dislike + */ + float getValue(); + + /** + * Sets the strength of the preference for this item + * + * @param value + * new preference + */ + void setValue(float value); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java new file mode 100644 index 0000000..3886bc6 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/PreferenceArray.java @@ -0,0 +1,143 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import java.io.Serializable; + +/** + * An alternate representation of an array of {@link Preference}. Implementations, in theory, can produce a + * more memory-efficient representation. + */ +public interface PreferenceArray extends Cloneable, Serializable, Iterable<Preference> { + + /** + * @return size of length of the "array" + */ + int length(); + + /** + * @param i + * index + * @return a materialized {@link Preference} representation of the preference at i + */ + Preference get(int i); + + /** + * Sets preference at i from information in the given {@link Preference} + * + * @param i + * @param pref + */ + void set(int i, Preference pref); + + /** + * @param i + * index + * @return user ID from preference at i + */ + long getUserID(int i); + + /** + * Sets user ID for preference at i. + * + * @param i + * index + * @param userID + * new user ID + */ + void setUserID(int i, long userID); + + /** + * @param i + * index + * @return item ID from preference at i + */ + long getItemID(int i); + + /** + * Sets item ID for preference at i. + * + * @param i + * index + * @param itemID + * new item ID + */ + void setItemID(int i, long itemID); + + /** + * @return all user or item IDs + */ + long[] getIDs(); + + /** + * @param i + * index + * @return preference value from preference at i + */ + float getValue(int i); + + /** + * Sets preference value for preference at i. + * + * @param i + * index + * @param value + * new preference value + */ + void setValue(int i, float value); + + /** + * @return independent copy of this object + */ + PreferenceArray clone(); + + /** + * Sorts underlying array by user ID, ascending. + */ + void sortByUser(); + + /** + * Sorts underlying array by item ID, ascending. + */ + void sortByItem(); + + /** + * Sorts underlying array by preference value, ascending. + */ + void sortByValue(); + + /** + * Sorts underlying array by preference value, descending. + */ + void sortByValueReversed(); + + /** + * @param userID + * user ID + * @return true if array contains a preference with given user ID + */ + boolean hasPrefWithUserID(long userID); + + /** + * @param itemID + * item ID + * @return true if array contains a preference with given item ID + */ + boolean hasPrefWithItemID(long itemID); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java new file mode 100644 index 0000000..ff29a34 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/model/UpdatableIDMigrator.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.model; + +import org.apache.mahout.cf.taste.common.TasteException; + +public interface UpdatableIDMigrator extends IDMigrator { + + /** + * Stores the reverse long-to-String mapping in some kind of backing store. Note that this must be called + * directly (or indirectly through {@link #initialize(Iterable)}) for every String that might be encountered + * in the application, or else the mapping will not be known. + * + * @param longID + * long ID + * @param stringID + * string ID that maps to/from that long ID + * @throws TasteException + * if an error occurs while saving the mapping + */ + void storeMapping(long longID, String stringID) throws TasteException; + + /** + * Make the mapping aware of the given string IDs. This must be called initially before the implementation + * is used, or else it will not be aware of reverse long-to-String mappings. + * + * @throws TasteException + * if an error occurs while storing the mappings + */ + void initialize(Iterable<String> stringIDs) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java new file mode 100644 index 0000000..2a143e1 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/neighborhood/UserNeighborhood.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.neighborhood; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Implementations of this interface compute a "neighborhood" of users like a given user. This neighborhood + * can be used to compute recommendations then. + * </p> + */ +public interface UserNeighborhood extends Refreshable { + + /** + * @param userID + * ID of user for which a neighborhood will be computed + * @return IDs of users in the neighborhood + * @throws TasteException + * if an error occurs while accessing data + */ + long[] getUserNeighborhood(long userID) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java new file mode 100644 index 0000000..ada1949 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/CandidateItemsStrategy.java @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; + +/** + * Used to retrieve all items that could possibly be recommended to the user + */ +public interface CandidateItemsStrategy extends Refreshable { + + /** + * @return IDs of all items that could be recommended to the user + */ + FastIDSet getCandidateItems(long userID, PreferenceArray preferencesFromUser, DataModel dataModel, + boolean includeKnownItems) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java new file mode 100644 index 0000000..d9a9cf7 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/IDRescorer.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +/** + * <p> + * A {@link Rescorer} which operates on {@code long} primitive IDs, rather than arbitrary {@link Object}s. + * This is provided since most uses of this interface in the framework take IDs (as {@code long}) as an + * argument, and so this can be used to avoid unnecessary boxing/unboxing. + * </p> + */ +public interface IDRescorer { + + /** + * @param id + * ID of thing (user, item, etc.) to rescore + * @param originalScore + * original score + * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely + */ + double rescore(long id, double originalScore); + + /** + * Returns {@code true} to exclude the given thing. + * + * @param id + * ID of thing (user, item, etc.) to rescore + * @return {@code true} to exclude, {@code false} otherwise + */ + boolean isFiltered(long id); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java new file mode 100644 index 0000000..570f851 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/ItemBasedRecommender.java @@ -0,0 +1,145 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import java.util.List; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.common.LongPair; + +/** + * <p> + * Interface implemented by "item-based" recommenders. + * </p> + */ +public interface ItemBasedRecommender extends Recommender { + + /** + * @param itemID + * ID of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @return items most similar to the given item, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long itemID, int howMany) throws TasteException; + + /** + * @param itemID + * ID of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @return itemss most similar to the given item, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long itemID, int howMany, Rescorer<LongPair> rescorer) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find estimates used to determine most similar items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, int howMany) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + Rescorer<LongPair> rescorer) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param excludeItemIfNotSimilarToAll + * exclude an item if it is not similar to each of the input items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + boolean excludeItemIfNotSimilarToAll) throws TasteException; + + /** + * @param itemIDs + * IDs of item for which to find most similar other items + * @param howMany + * desired number of most similar items to find + * @param rescorer + * {@link Rescorer} which can adjust item-item similarity estimates used to determine most similar + * items + * @param excludeItemIfNotSimilarToAll + * exclude an item if it is not similar to each of the input items + * @return items most similar to the given items, ordered from most similar to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> mostSimilarItems(long[] itemIDs, + int howMany, + Rescorer<LongPair> rescorer, + boolean excludeItemIfNotSimilarToAll) throws TasteException; + + /** + * <p> + * Lists the items that were most influential in recommending a given item to a given user. Exactly how this + * is determined is left to the implementation, but, generally this will return items that the user prefers + * and that are similar to the given item. + * </p> + * + * <p> + * This returns a {@link List} of {@link RecommendedItem} which is a little misleading since it's returning + * recommend<strong>ing</strong> items, but, I thought it more natural to just reuse this class since it + * encapsulates an item and value. The value here does not necessarily have a consistent interpretation or + * expected range; it will be higher the more influential the item was in the recommendation. + * </p> + * + * @param userID + * ID of user who was recommended the item + * @param itemID + * ID of item that was recommended + * @param howMany + * maximum number of items to return + * @return {@link List} of {@link RecommendedItem}, ordered from most influential in recommended the given + * item to least + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + List<RecommendedItem> recommendedBecause(long userID, long itemID, int howMany) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java new file mode 100644 index 0000000..282ceff --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/MostSimilarItemsCandidateItemsStrategy.java @@ -0,0 +1,31 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * Used to retrieve all items that could possibly be similar + */ +public interface MostSimilarItemsCandidateItemsStrategy extends Refreshable { + + FastIDSet getCandidateItems(long[] itemIDs, DataModel dataModel) throws TasteException; +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java new file mode 100644 index 0000000..1fcece8 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/RecommendedItem.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +/** + * <p> + * Implementations encapsulate items that are recommended, and include the item recommended and a value + * expressing the strength of the preference. + * </p> + */ +public interface RecommendedItem { + + /** @return the recommended item ID */ + long getItemID(); + + /** + * <p> + * A value expressing the strength of the preference for the recommended item. The range of the values + * depends on the implementation. Implementations must use larger values to express stronger preference. + * </p> + * + * @return strength of the preference + */ + float getValue(); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java new file mode 100644 index 0000000..4135aff --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Recommender.java @@ -0,0 +1,132 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +import java.util.List; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * <p> + * Implementations of this interface can recommend items for a user. Implementations will likely take + * advantage of several classes in other packages here to compute this. + * </p> + */ +public interface Recommender extends Refreshable { + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @param includeKnownItems + * whether to include items already known by the user in recommendations + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany, boolean includeKnownItems) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @param rescorer + * rescoring function to apply before final list of recommendations is determined + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer) throws TasteException; + + /** + * @param userID + * user for which recommendations are to be computed + * @param howMany + * desired number of recommendations + * @param rescorer + * rescoring function to apply before final list of recommendations is determined + * @param includeKnownItems + * whether to include items already known by the user in recommendations + * @return {@link List} of recommended {@link RecommendedItem}s, ordered from most strongly recommend to + * least + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + + List<RecommendedItem> recommend(long userID, int howMany, IDRescorer rescorer, boolean includeKnownItems) + throws TasteException; + + /** + * @param userID + * user ID whose preference is to be estimated + * @param itemID + * item ID to estimate preference for + * @return an estimated preference if the user has not expressed a preference for the item, or else the + * user's actual preference for the item. If a preference cannot be estimated, returns + * {@link Double#NaN} + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + float estimatePreference(long userID, long itemID) throws TasteException; + + /** + * @param userID + * user to set preference for + * @param itemID + * item to set preference for + * @param value + * preference value + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + void setPreference(long userID, long itemID, float value) throws TasteException; + + /** + * @param userID + * user from which to remove preference + * @param itemID + * item for which to remove preference + * @throws TasteException + * if an error occurs while accessing the {@link DataModel} + */ + void removePreference(long userID, long itemID) throws TasteException; + + /** + * @return underlying {@link DataModel} used by this {@link Recommender} implementation + */ + DataModel getDataModel(); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java new file mode 100644 index 0000000..1490761 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/Rescorer.java @@ -0,0 +1,52 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.recommender; + +/** + * <p> + * A {@link Rescorer} simply assigns a new "score" to a thing like an ID of an item or user which a + * {@link Recommender} is considering returning as a top recommendation. It may be used to arbitrarily re-rank + * the results according to application-specific logic before returning recommendations. For example, an + * application may want to boost the score of items in a certain category just for one request. + * </p> + * + * <p> + * A {@link Rescorer} can also exclude a thing from consideration entirely by returning {@code true} from + * {@link #isFiltered(Object)}. + * </p> + */ +public interface Rescorer<T> { + + /** + * @param thing + * thing to rescore + * @param originalScore + * original score + * @return modified score, or {@link Double#NaN} to indicate that this should be excluded entirely + */ + double rescore(T thing, double originalScore); + + /** + * Returns {@code true} to exclude the given thing. + * + * @param thing + * the thing to filter + * @return {@code true} to exclude, {@code false} otherwise + */ + boolean isFiltered(T thing); +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java new file mode 100644 index 0000000..b48593a --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/recommender/UserBasedRecommender.java @@ -0,0 +1,54 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.mahout.cf.taste.recommender; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.common.LongPair; + +/** + * <p> + * Interface implemented by "user-based" recommenders. + * </p> + */ +public interface UserBasedRecommender extends Recommender { + + /** + * @param userID + * ID of user for which to find most similar other users + * @param howMany + * desired number of most similar users to find + * @return users most similar to the given user + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + long[] mostSimilarUserIDs(long userID, int howMany) throws TasteException; + + /** + * @param userID + * ID of user for which to find most similar other users + * @param howMany + * desired number of most similar users to find + * @param rescorer + * {@link Rescorer} which can adjust user-user similarity estimates used to determine most similar + * users + * @return IDs of users most similar to the given user + * @throws TasteException + * if an error occurs while accessing the {@link org.apache.mahout.cf.taste.model.DataModel} + */ + long[] mostSimilarUserIDs(long userID, int howMany, Rescorer<LongPair> rescorer) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java new file mode 100644 index 0000000..814610b --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/ItemSimilarity.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Implementations of this interface define a notion of similarity between two items. Implementations should + * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity. + * </p> + * + * @see UserSimilarity + */ +public interface ItemSimilarity extends Refreshable { + + /** + * <p> + * Returns the degree of similarity, of two items, based on the preferences that users have expressed for + * the items. + * </p> + * + * @param itemID1 first item ID + * @param itemID2 second item ID + * @return similarity between the items, in [-1,1] or {@link Double#NaN} similarity is unknown + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if either item is known to be non-existent in the data + * @throws TasteException if an error occurs while accessing the data + */ + double itemSimilarity(long itemID1, long itemID2) throws TasteException; + + /** + * <p>A bulk-get version of {@link #itemSimilarity(long, long)}.</p> + * + * @param itemID1 first item ID + * @param itemID2s second item IDs to compute similarity with + * @return similarity between itemID1 and other items + * @throws org.apache.mahout.cf.taste.common.NoSuchItemException + * if any item is known to be non-existent in the data + * @throws TasteException if an error occurs while accessing the data + */ + double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException; + + /** + * @return all IDs of similar items, in no particular order + */ + long[] allSimilarItemIDs(long itemID) throws TasteException; +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java new file mode 100644 index 0000000..76bb328 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/PreferenceInferrer.java @@ -0,0 +1,47 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Implementations of this interface compute an inferred preference for a user and an item that the user has + * not expressed any preference for. This might be an average of other preferences scores from that user, for + * example. This technique is sometimes called "default voting". + * </p> + */ +public interface PreferenceInferrer extends Refreshable { + + /** + * <p> + * Infers the given user's preference value for an item. + * </p> + * + * @param userID + * ID of user to infer preference for + * @param itemID + * item ID to infer preference for + * @return inferred preference + * @throws TasteException + * if an error occurs while inferring + */ + float inferPreference(long userID, long itemID) throws TasteException; + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java new file mode 100644 index 0000000..bd53c51 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/UserSimilarity.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity; + +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; + +/** + * <p> + * Implementations of this interface define a notion of similarity between two users. Implementations should + * return values in the range -1.0 to 1.0, with 1.0 representing perfect similarity. + * </p> + * + * @see ItemSimilarity + */ +public interface UserSimilarity extends Refreshable { + + /** + * <p> + * Returns the degree of similarity, of two users, based on the their preferences. + * </p> + * + * @param userID1 first user ID + * @param userID2 second user ID + * @return similarity between the users, in [-1,1] or {@link Double#NaN} similarity is unknown + * @throws org.apache.mahout.cf.taste.common.NoSuchUserException + * if either user is known to be non-existent in the data + * @throws TasteException if an error occurs while accessing the data + */ + double userSimilarity(long userID1, long userID2) throws TasteException; + + // Should we implement userSimilarities() like ItemSimilarity.itemSimilarities()? + + /** + * <p> + * Attaches a {@link PreferenceInferrer} to the {@link UserSimilarity} implementation. + * </p> + * + * @param inferrer {@link PreferenceInferrer} + */ + void setPreferenceInferrer(PreferenceInferrer inferrer); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java new file mode 100644 index 0000000..b934d0c --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/BatchItemSimilarities.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity.precompute; + +import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender; + +import java.io.IOException; + +public abstract class BatchItemSimilarities { + + private final ItemBasedRecommender recommender; + private final int similarItemsPerItem; + + /** + * @param recommender recommender to use + * @param similarItemsPerItem number of similar items to compute per item + */ + protected BatchItemSimilarities(ItemBasedRecommender recommender, int similarItemsPerItem) { + this.recommender = recommender; + this.similarItemsPerItem = similarItemsPerItem; + } + + protected ItemBasedRecommender getRecommender() { + return recommender; + } + + protected int getSimilarItemsPerItem() { + return similarItemsPerItem; + } + + /** + * @param degreeOfParallelism number of threads to use for the computation + * @param maxDurationInHours maximum duration of the computation + * @param writer {@link SimilarItemsWriter} used to persist the results + * @return the number of similarities precomputed + * @throws IOException + * @throws RuntimeException if the computation takes longer than maxDurationInHours + */ + public abstract int computeItemSimilarities(int degreeOfParallelism, int maxDurationInHours, + SimilarItemsWriter writer) throws IOException; +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java new file mode 100644 index 0000000..5d40051 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItem.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity.precompute; + +import com.google.common.primitives.Doubles; + +import java.util.Comparator; + +/** + * Modeling similarity towards another item + */ +public class SimilarItem { + + public static final Comparator<SimilarItem> COMPARE_BY_SIMILARITY = new Comparator<SimilarItem>() { + @Override + public int compare(SimilarItem s1, SimilarItem s2) { + return Doubles.compare(s1.similarity, s2.similarity); + } + }; + + private long itemID; + private double similarity; + + public SimilarItem(long itemID, double similarity) { + set(itemID, similarity); + } + + public void set(long itemID, double similarity) { + this.itemID = itemID; + this.similarity = similarity; + } + + public long getItemID() { + return itemID; + } + + public double getSimilarity() { + return similarity; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java new file mode 100644 index 0000000..057e996 --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItems.java @@ -0,0 +1,84 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity.precompute; + +import com.google.common.collect.UnmodifiableIterator; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; + +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; + +/** + * Compact representation of all similar items for an item + */ +public class SimilarItems { + + private final long itemID; + private final long[] similarItemIDs; + private final double[] similarities; + + public SimilarItems(long itemID, List<RecommendedItem> similarItems) { + this.itemID = itemID; + + int numSimilarItems = similarItems.size(); + similarItemIDs = new long[numSimilarItems]; + similarities = new double[numSimilarItems]; + + for (int n = 0; n < numSimilarItems; n++) { + similarItemIDs[n] = similarItems.get(n).getItemID(); + similarities[n] = similarItems.get(n).getValue(); + } + } + + public long getItemID() { + return itemID; + } + + public int numSimilarItems() { + return similarItemIDs.length; + } + + public Iterable<SimilarItem> getSimilarItems() { + return new Iterable<SimilarItem>() { + @Override + public Iterator<SimilarItem> iterator() { + return new SimilarItemsIterator(); + } + }; + } + + private class SimilarItemsIterator extends UnmodifiableIterator<SimilarItem> { + + private int index = -1; + + @Override + public boolean hasNext() { + return index < (similarItemIDs.length - 1); + } + + @Override + public SimilarItem next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + index++; + return new SimilarItem(similarItemIDs[index], similarities[index]); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/5eda9e1f/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java new file mode 100644 index 0000000..35d6bfe --- /dev/null +++ b/community/mahout-mr/src/main/java/org/apache/mahout/cf/taste/similarity/precompute/SimilarItemsWriter.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.similarity.precompute; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Used to persist the results of a batch item similarity computation + * conducted with a {@link BatchItemSimilarities} implementation + */ +public interface SimilarItemsWriter extends Closeable { + + void open() throws IOException; + + void add(SimilarItems similarItems) throws IOException; + +}
