http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java new file mode 100644 index 0000000..e267a39 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/OrderBasedRecommenderEvaluator.java @@ -0,0 +1,431 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.eval; + +import java.util.Arrays; +import java.util.List; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.impl.common.RunningAverage; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.cf.taste.recommender.RecommendedItem; +import org.apache.mahout.cf.taste.recommender.Recommender; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** + * Evaluate recommender by comparing order of all raw prefs with order in + * recommender's output for that user. Can also compare data models. + */ +public final class OrderBasedRecommenderEvaluator { + + private static final Logger log = LoggerFactory.getLogger(OrderBasedRecommenderEvaluator.class); + + private OrderBasedRecommenderEvaluator() { + } + + public static void evaluate(Recommender recommender1, + Recommender recommender2, + int samples, + RunningAverage tracker, + String tag) throws TasteException { + printHeader(); + LongPrimitiveIterator users = recommender1.getDataModel().getUserIDs(); + + while (users.hasNext()) { + long userID = users.nextLong(); + List<RecommendedItem> recs1 = recommender1.recommend(userID, samples); + List<RecommendedItem> recs2 = recommender2.recommend(userID, samples); + FastIDSet commonSet = new FastIDSet(); + long maxItemID = setBits(commonSet, recs1, samples); + FastIDSet otherSet = new FastIDSet(); + maxItemID = Math.max(maxItemID, setBits(otherSet, recs2, samples)); + int max = mask(commonSet, otherSet, maxItemID); + max = Math.min(max, samples); + if (max < 2) { + continue; + } + Long[] items1 = getCommonItems(commonSet, recs1, max); + Long[] items2 = getCommonItems(commonSet, recs2, max); + double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); + tracker.addDatum(variance); + } + } + + public static void evaluate(Recommender recommender, + DataModel model, + int samples, + RunningAverage tracker, + String tag) throws TasteException { + printHeader(); + LongPrimitiveIterator users = recommender.getDataModel().getUserIDs(); + while (users.hasNext()) { + long userID = users.nextLong(); + List<RecommendedItem> recs1 = recommender.recommend(userID, model.getNumItems()); + PreferenceArray prefs2 = model.getPreferencesFromUser(userID); + prefs2.sortByValueReversed(); + FastIDSet commonSet = new FastIDSet(); + long maxItemID = setBits(commonSet, recs1, samples); + FastIDSet otherSet = new FastIDSet(); + maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples)); + int max = mask(commonSet, otherSet, maxItemID); + max = Math.min(max, samples); + if (max < 2) { + continue; + } + Long[] items1 = getCommonItems(commonSet, recs1, max); + Long[] items2 = getCommonItems(commonSet, prefs2, max); + double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); + tracker.addDatum(variance); + } + } + + public static void evaluate(DataModel model1, + DataModel model2, + int samples, + RunningAverage tracker, + String tag) throws TasteException { + printHeader(); + LongPrimitiveIterator users = model1.getUserIDs(); + while (users.hasNext()) { + long userID = users.nextLong(); + PreferenceArray prefs1 = model1.getPreferencesFromUser(userID); + PreferenceArray prefs2 = model2.getPreferencesFromUser(userID); + prefs1.sortByValueReversed(); + prefs2.sortByValueReversed(); + FastIDSet commonSet = new FastIDSet(); + long maxItemID = setBits(commonSet, prefs1, samples); + FastIDSet otherSet = new FastIDSet(); + maxItemID = Math.max(maxItemID, setBits(otherSet, prefs2, samples)); + int max = mask(commonSet, otherSet, maxItemID); + max = Math.min(max, samples); + if (max < 2) { + continue; + } + Long[] items1 = getCommonItems(commonSet, prefs1, max); + Long[] items2 = getCommonItems(commonSet, prefs2, max); + double variance = scoreCommonSubset(tag, userID, samples, max, items1, items2); + tracker.addDatum(variance); + } + } + + /** + * This exists because FastIDSet has 'retainAll' as MASK, but there is + * no count of the number of items in the set. size() is supposed to do + * this but does not work. + */ + private static int mask(FastIDSet commonSet, FastIDSet otherSet, long maxItemID) { + int count = 0; + for (int i = 0; i <= maxItemID; i++) { + if (commonSet.contains(i)) { + if (otherSet.contains(i)) { + count++; + } else { + commonSet.remove(i); + } + } + } + return count; + } + + private static Long[] getCommonItems(FastIDSet commonSet, Iterable<RecommendedItem> recs, int max) { + Long[] commonItems = new Long[max]; + int index = 0; + for (RecommendedItem rec : recs) { + Long item = rec.getItemID(); + if (commonSet.contains(item)) { + commonItems[index++] = item; + } + if (index == max) { + break; + } + } + return commonItems; + } + + private static Long[] getCommonItems(FastIDSet commonSet, PreferenceArray prefs1, int max) { + Long[] commonItems = new Long[max]; + int index = 0; + for (int i = 0; i < prefs1.length(); i++) { + Long item = prefs1.getItemID(i); + if (commonSet.contains(item)) { + commonItems[index++] = item; + } + if (index == max) { + break; + } + } + return commonItems; + } + + private static long setBits(FastIDSet modelSet, List<RecommendedItem> items, int max) { + long maxItem = -1; + for (int i = 0; i < items.size() && i < max; i++) { + long itemID = items.get(i).getItemID(); + modelSet.add(itemID); + if (itemID > maxItem) { + maxItem = itemID; + } + } + return maxItem; + } + + private static long setBits(FastIDSet modelSet, PreferenceArray prefs, int max) { + long maxItem = -1; + for (int i = 0; i < prefs.length() && i < max; i++) { + long itemID = prefs.getItemID(i); + modelSet.add(itemID); + if (itemID > maxItem) { + maxItem = itemID; + } + } + return maxItem; + } + + private static void printHeader() { + log.info("tag,user,samples,common,hamming,bubble,rank,normal,score"); + } + + /** + * Common Subset Scoring + * + * These measurements are given the set of results that are common to both + * recommendation lists. They only get ordered lists. + * + * These measures all return raw numbers do not correlate among the tests. + * The numbers are not corrected against the total number of samples or the + * number of common items. + * The one contract is that all measures are 0 for an exact match and an + * increasing positive number as differences increase. + */ + private static double scoreCommonSubset(String tag, + long userID, + int samples, + int subset, + Long[] itemsL, + Long[] itemsR) { + int[] vectorZ = new int[subset]; + int[] vectorZabs = new int[subset]; + + long bubble = sort(itemsL, itemsR); + int hamming = slidingWindowHamming(itemsR, itemsL); + if (hamming > samples) { + throw new IllegalStateException(); + } + getVectorZ(itemsR, itemsL, vectorZ, vectorZabs); + double normalW = normalWilcoxon(vectorZ, vectorZabs); + double meanRank = getMeanRank(vectorZabs); + // case statement for requested value + double variance = Math.sqrt(meanRank); + log.info("{},{},{},{},{},{},{},{},{}", + tag, userID, samples, subset, hamming, bubble, meanRank, normalW, variance); + return variance; + } + + // simple sliding-window hamming distance: a[i or plus/minus 1] == b[i] + private static int slidingWindowHamming(Long[] itemsR, Long[] itemsL) { + int count = 0; + int samples = itemsR.length; + + if (itemsR[0].equals(itemsL[0]) || itemsR[0].equals(itemsL[1])) { + count++; + } + for (int i = 1; i < samples - 1; i++) { + long itemID = itemsL[i]; + if (itemsR[i] == itemID || itemsR[i - 1] == itemID || itemsR[i + 1] == itemID) { + count++; + } + } + if (itemsR[samples - 1].equals(itemsL[samples - 1]) || itemsR[samples - 1].equals(itemsL[samples - 2])) { + count++; + } + return count; + } + + /** + * Normal-distribution probability value for matched sets of values. + * Based upon: + * http://comp9.psych.cornell.edu/Darlington/normscor.htm + * + * The Standard Wilcoxon is not used because it requires a lookup table. + */ + static double normalWilcoxon(int[] vectorZ, int[] vectorZabs) { + int nitems = vectorZ.length; + + double[] ranks = new double[nitems]; + double[] ranksAbs = new double[nitems]; + wilcoxonRanks(vectorZ, vectorZabs, ranks, ranksAbs); + return Math.min(getMeanWplus(ranks), getMeanWminus(ranks)); + } + + /** + * vector Z is a list of distances between the correct value and the recommended value + * Z[i] = position i of correct itemID - position of correct itemID in recommendation list + * can be positive or negative + * the smaller the better - means recommendations are closer + * both are the same length, and both sample from the same set + * + * destructive to items arrays - allows N log N instead of N^2 order + */ + private static void getVectorZ(Long[] itemsR, Long[] itemsL, int[] vectorZ, int[] vectorZabs) { + int nitems = itemsR.length; + int bottom = 0; + int top = nitems - 1; + for (int i = 0; i < nitems; i++) { + long itemID = itemsR[i]; + for (int j = bottom; j <= top; j++) { + if (itemsL[j] == null) { + continue; + } + long test = itemsL[j]; + if (itemID == test) { + vectorZ[i] = i - j; + vectorZabs[i] = Math.abs(i - j); + if (j == bottom) { + bottom++; + } else if (j == top) { + top--; + } else { + itemsL[j] = null; + } + break; + } + } + } + } + + /** + * Ranks are the position of the value from low to high, divided by the # of values. + * I had to walk through it a few times. + */ + private static void wilcoxonRanks(int[] vectorZ, int[] vectorZabs, double[] ranks, double[] ranksAbs) { + int nitems = vectorZ.length; + int[] sorted = vectorZabs.clone(); + Arrays.sort(sorted); + int zeros = 0; + for (; zeros < nitems; zeros++) { + if (sorted[zeros] > 0) { + break; + } + } + for (int i = 0; i < nitems; i++) { + double rank = 0.0; + int count = 0; + int score = vectorZabs[i]; + for (int j = 0; j < nitems; j++) { + if (score == sorted[j]) { + rank += j + 1 - zeros; + count++; + } else if (score < sorted[j]) { + break; + } + } + if (vectorZ[i] != 0) { + ranks[i] = (rank / count) * (vectorZ[i] < 0 ? -1 : 1); // better be at least 1 + ranksAbs[i] = Math.abs(ranks[i]); + } + } + } + + private static double getMeanRank(int[] ranks) { + int nitems = ranks.length; + double sum = 0.0; + for (int rank : ranks) { + sum += rank; + } + return sum / nitems; + } + + private static double getMeanWplus(double[] ranks) { + int nitems = ranks.length; + double sum = 0.0; + for (double rank : ranks) { + if (rank > 0) { + sum += rank; + } + } + return sum / nitems; + } + + private static double getMeanWminus(double[] ranks) { + int nitems = ranks.length; + double sum = 0.0; + for (double rank : ranks) { + if (rank < 0) { + sum -= rank; + } + } + return sum / nitems; + } + + /** + * Do bubble sort and return number of swaps needed to match preference lists. + * Sort itemsR using itemsL as the reference order. + */ + static long sort(Long[] itemsL, Long[] itemsR) { + int length = itemsL.length; + if (length < 2) { + return 0; + } + if (length == 2) { + return itemsL[0].longValue() == itemsR[0].longValue() ? 0 : 1; + } + // 1) avoid changing originals; 2) primitive type is more efficient + long[] reference = new long[length]; + long[] sortable = new long[length]; + for (int i = 0; i < length; i++) { + reference[i] = itemsL[i]; + sortable[i] = itemsR[i]; + } + int sorted = 0; + long swaps = 0; + while (sorted < length - 1) { + // opportunistically trim back the top + while (length > 0 && reference[length - 1] == sortable[length - 1]) { + length--; + } + if (length == 0) { + break; + } + if (reference[sorted] == sortable[sorted]) { + sorted++; + } else { + for (int j = sorted; j < length - 1; j++) { + // do not swap anything already in place + int jump = 1; + if (reference[j] == sortable[j]) { + while (j + jump < length && reference[j + jump] == sortable[j + jump]) { + jump++; + } + } + if (j + jump < length && !(reference[j] == sortable[j] && reference[j + jump] == sortable[j + jump])) { + long tmp = sortable[j]; + sortable[j] = sortable[j + 1]; + sortable[j + 1] = tmp; + swaps++; + } + } + } + } + return swaps; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java new file mode 100644 index 0000000..97eda10 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/RMSRecommenderEvaluator.java @@ -0,0 +1,56 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.eval; + +import org.apache.mahout.cf.taste.impl.common.FullRunningAverage; +import org.apache.mahout.cf.taste.impl.common.RunningAverage; +import org.apache.mahout.cf.taste.model.Preference; + +/** + * <p> + * A {@link org.apache.mahout.cf.taste.eval.RecommenderEvaluator} which computes the "root mean squared" + * difference between predicted and actual ratings for users. This is the square root of the average of this + * difference, squared. + * </p> + */ +public final class RMSRecommenderEvaluator extends AbstractDifferenceRecommenderEvaluator { + + private RunningAverage average; + + @Override + protected void reset() { + average = new FullRunningAverage(); + } + + @Override + protected void processOneEstimate(float estimatedPreference, Preference realPref) { + double diff = realPref.getValue() - estimatedPreference; + average.addDatum(diff * diff); + } + + @Override + protected double computeFinalEvaluation() { + return Math.sqrt(average.getAverage()); + } + + @Override + public String toString() { + return "RMSRecommenderEvaluator"; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java new file mode 100644 index 0000000..036d0b4 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/eval/StatsCallable.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.eval; + +import org.apache.mahout.cf.taste.impl.common.RunningAverageAndStdDev; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.concurrent.Callable; +import java.util.concurrent.atomic.AtomicInteger; + +final class StatsCallable implements Callable<Void> { + + private static final Logger log = LoggerFactory.getLogger(StatsCallable.class); + + private final Callable<Void> delegate; + private final boolean logStats; + private final RunningAverageAndStdDev timing; + private final AtomicInteger noEstimateCounter; + + StatsCallable(Callable<Void> delegate, + boolean logStats, + RunningAverageAndStdDev timing, + AtomicInteger noEstimateCounter) { + this.delegate = delegate; + this.logStats = logStats; + this.timing = timing; + this.noEstimateCounter = noEstimateCounter; + } + + @Override + public Void call() throws Exception { + long start = System.currentTimeMillis(); + delegate.call(); + long end = System.currentTimeMillis(); + timing.addDatum(end - start); + if (logStats) { + Runtime runtime = Runtime.getRuntime(); + int average = (int) timing.getAverage(); + log.info("Average time per recommendation: {}ms", average); + long totalMemory = runtime.totalMemory(); + long memory = totalMemory - runtime.freeMemory(); + log.info("Approximate memory used: {}MB / {}MB", memory / 1000000L, totalMemory / 1000000L); + log.info("Unable to recommend in {} cases", noEstimateCounter.get()); + } + return null; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java new file mode 100644 index 0000000..a1a2a1f --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractDataModel.java @@ -0,0 +1,53 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import org.apache.mahout.cf.taste.model.DataModel; + +/** + * Contains some features common to all implementations. + */ +public abstract class AbstractDataModel implements DataModel { + + private float maxPreference; + private float minPreference; + + protected AbstractDataModel() { + maxPreference = Float.NaN; + minPreference = Float.NaN; + } + + @Override + public float getMaxPreference() { + return maxPreference; + } + + protected void setMaxPreference(float maxPreference) { + this.maxPreference = maxPreference; + } + + @Override + public float getMinPreference() { + return minPreference; + } + + protected void setMinPreference(float minPreference) { + this.minPreference = minPreference; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java new file mode 100644 index 0000000..6efa6fa --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractIDMigrator.java @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Collection; + +import org.apache.commons.io.Charsets; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.model.IDMigrator; + +public abstract class AbstractIDMigrator implements IDMigrator { + + private final MessageDigest md5Digest; + + protected AbstractIDMigrator() { + try { + md5Digest = MessageDigest.getInstance("MD5"); + } catch (NoSuchAlgorithmException nsae) { + // Can't happen + throw new IllegalStateException(nsae); + } + } + + /** + * @return most significant 8 bytes of the MD5 hash of the string, as a long + */ + protected final long hash(String value) { + byte[] md5hash; + synchronized (md5Digest) { + md5hash = md5Digest.digest(value.getBytes(Charsets.UTF_8)); + md5Digest.reset(); + } + long hash = 0L; + for (int i = 0; i < 8; i++) { + hash = hash << 8 | md5hash[i] & 0x00000000000000FFL; + } + return hash; + } + + @Override + public long toLongID(String stringID) { + return hash(stringID); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java new file mode 100644 index 0000000..cd3a434 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/AbstractJDBCIDMigrator.java @@ -0,0 +1,108 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.sql.Connection; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; + +import javax.sql.DataSource; + +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.model.UpdatableIDMigrator; +import org.apache.mahout.common.IOUtils; + +/** + * Implementation which stores the reverse long-to-String mapping in a database. Subclasses can override and + * configure the class to operate with particular databases by supplying appropriate SQL statements to the + * constructor. + */ +public abstract class AbstractJDBCIDMigrator extends AbstractIDMigrator implements UpdatableIDMigrator { + + public static final String DEFAULT_MAPPING_TABLE = "taste_id_mapping"; + public static final String DEFAULT_LONG_ID_COLUMN = "long_id"; + public static final String DEFAULT_STRING_ID_COLUMN = "string_id"; + + private final DataSource dataSource; + private final String getStringIDSQL; + private final String storeMappingSQL; + + /** + * @param getStringIDSQL + * SQL statement which selects one column, the String ID, from a mapping table. The statement + * should take one long parameter. + * @param storeMappingSQL + * SQL statement which saves a mapping from long to String. It should take two parameters, a long + * and a String. + */ + protected AbstractJDBCIDMigrator(DataSource dataSource, String getStringIDSQL, String storeMappingSQL) { + this.dataSource = dataSource; + this.getStringIDSQL = getStringIDSQL; + this.storeMappingSQL = storeMappingSQL; + } + + @Override + public final void storeMapping(long longID, String stringID) throws TasteException { + Connection conn = null; + PreparedStatement stmt = null; + try { + conn = dataSource.getConnection(); + stmt = conn.prepareStatement(storeMappingSQL); + stmt.setLong(1, longID); + stmt.setString(2, stringID); + stmt.executeUpdate(); + } catch (SQLException sqle) { + throw new TasteException(sqle); + } finally { + IOUtils.quietClose(null, stmt, conn); + } + } + + @Override + public final String toStringID(long longID) throws TasteException { + Connection conn = null; + PreparedStatement stmt = null; + ResultSet rs = null; + try { + conn = dataSource.getConnection(); + stmt = conn.prepareStatement(getStringIDSQL, ResultSet.TYPE_FORWARD_ONLY, ResultSet.CONCUR_READ_ONLY); + stmt.setFetchDirection(ResultSet.FETCH_FORWARD); + stmt.setFetchSize(1); + stmt.setLong(1, longID); + rs = stmt.executeQuery(); + if (rs.next()) { + return rs.getString(1); + } else { + return null; + } + } catch (SQLException sqle) { + throw new TasteException(sqle); + } finally { + IOUtils.quietClose(rs, stmt, conn); + } + } + + @Override + public void initialize(Iterable<String> stringIDs) throws TasteException { + for (String stringID : stringIDs) { + storeMapping(toLongID(stringID), stringID); + } + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java new file mode 100644 index 0000000..6db5807 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanItemPreferenceArray.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.CountingIterator; + +/** + * <p> + * Like {@link BooleanUserPreferenceArray} but stores preferences for one item (all item IDs the same) rather + * than one user. + * </p> + * + * @see BooleanPreference + * @see BooleanUserPreferenceArray + * @see GenericItemPreferenceArray + */ +public final class BooleanItemPreferenceArray implements PreferenceArray { + + private final long[] ids; + private long id; + + public BooleanItemPreferenceArray(int size) { + this.ids = new long[size]; + this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value + } + + public BooleanItemPreferenceArray(List<? extends Preference> prefs, boolean forOneUser) { + this(prefs.size()); + int size = prefs.size(); + for (int i = 0; i < size; i++) { + Preference pref = prefs.get(i); + ids[i] = forOneUser ? pref.getItemID() : pref.getUserID(); + } + if (size > 0) { + id = forOneUser ? prefs.get(0).getUserID() : prefs.get(0).getItemID(); + } + } + + /** + * This is a private copy constructor for clone(). + */ + private BooleanItemPreferenceArray(long[] ids, long id) { + this.ids = ids; + this.id = id; + } + + @Override + public int length() { + return ids.length; + } + + @Override + public Preference get(int i) { + return new PreferenceView(i); + } + + @Override + public void set(int i, Preference pref) { + id = pref.getItemID(); + ids[i] = pref.getUserID(); + } + + @Override + public long getUserID(int i) { + return ids[i]; + } + + @Override + public void setUserID(int i, long userID) { + ids[i] = userID; + } + + @Override + public long getItemID(int i) { + return id; + } + + /** + * {@inheritDoc} + * + * Note that this method will actually set the item ID for <em>all</em> preferences. + */ + @Override + public void setItemID(int i, long itemID) { + id = itemID; + } + + /** + * @return all user IDs + */ + @Override + public long[] getIDs() { + return ids; + } + + @Override + public float getValue(int i) { + return 1.0f; + } + + @Override + public void setValue(int i, float value) { + throw new UnsupportedOperationException(); + } + + @Override + public void sortByUser() { + Arrays.sort(ids); + } + + @Override + public void sortByItem() { } + + @Override + public void sortByValue() { } + + @Override + public void sortByValueReversed() { } + + @Override + public boolean hasPrefWithUserID(long userID) { + for (long id : ids) { + if (userID == id) { + return true; + } + } + return false; + } + + @Override + public boolean hasPrefWithItemID(long itemID) { + return id == itemID; + } + + @Override + public BooleanItemPreferenceArray clone() { + return new BooleanItemPreferenceArray(ids.clone(), id); + } + + @Override + public int hashCode() { + return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof BooleanItemPreferenceArray)) { + return false; + } + BooleanItemPreferenceArray otherArray = (BooleanItemPreferenceArray) other; + return id == otherArray.id && Arrays.equals(ids, otherArray.ids); + } + + @Override + public Iterator<Preference> iterator() { + return Iterators.transform(new CountingIterator(length()), + new Function<Integer, Preference>() { + @Override + public Preference apply(Integer from) { + return new PreferenceView(from); + } + }); + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(10 * ids.length); + result.append("BooleanItemPreferenceArray[itemID:"); + result.append(id); + result.append(",{"); + for (int i = 0; i < ids.length; i++) { + if (i > 0) { + result.append(','); + } + result.append(ids[i]); + } + result.append("}]"); + return result.toString(); + } + + private final class PreferenceView implements Preference { + + private final int i; + + private PreferenceView(int i) { + this.i = i; + } + + @Override + public long getUserID() { + return BooleanItemPreferenceArray.this.getUserID(i); + } + + @Override + public long getItemID() { + return BooleanItemPreferenceArray.this.getItemID(i); + } + + @Override + public float getValue() { + return 1.0f; + } + + @Override + public void setValue(float value) { + throw new UnsupportedOperationException(); + } + + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java new file mode 100644 index 0000000..2093af8 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanPreference.java @@ -0,0 +1,64 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.io.Serializable; + +import org.apache.mahout.cf.taste.model.Preference; + +/** + * Encapsulates a simple boolean "preference" for an item whose value does not matter (is fixed at 1.0). This + * is appropriate in situations where users conceptually have only a general "yes" preference for items, + * rather than a spectrum of preference values. + */ +public final class BooleanPreference implements Preference, Serializable { + + private final long userID; + private final long itemID; + + public BooleanPreference(long userID, long itemID) { + this.userID = userID; + this.itemID = itemID; + } + + @Override + public long getUserID() { + return userID; + } + + @Override + public long getItemID() { + return itemID; + } + + @Override + public float getValue() { + return 1.0f; + } + + @Override + public void setValue(float value) { + throw new UnsupportedOperationException(); + } + + @Override + public String toString() { + return "BooleanPreference[userID: " + userID + ", itemID:" + itemID + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java new file mode 100644 index 0000000..629e0cf --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/BooleanUserPreferenceArray.java @@ -0,0 +1,234 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.CountingIterator; + +/** + * <p> + * Like {@link GenericUserPreferenceArray} but stores, conceptually, {@link BooleanPreference} objects which + * have no associated preference value. + * </p> + * + * @see BooleanPreference + * @see BooleanItemPreferenceArray + * @see GenericUserPreferenceArray + */ +public final class BooleanUserPreferenceArray implements PreferenceArray { + + private final long[] ids; + private long id; + + public BooleanUserPreferenceArray(int size) { + this.ids = new long[size]; + this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value + } + + public BooleanUserPreferenceArray(List<? extends Preference> prefs) { + this(prefs.size()); + int size = prefs.size(); + for (int i = 0; i < size; i++) { + Preference pref = prefs.get(i); + ids[i] = pref.getItemID(); + } + if (size > 0) { + id = prefs.get(0).getUserID(); + } + } + + /** + * This is a private copy constructor for clone(). + */ + private BooleanUserPreferenceArray(long[] ids, long id) { + this.ids = ids; + this.id = id; + } + + @Override + public int length() { + return ids.length; + } + + @Override + public Preference get(int i) { + return new PreferenceView(i); + } + + @Override + public void set(int i, Preference pref) { + id = pref.getUserID(); + ids[i] = pref.getItemID(); + } + + @Override + public long getUserID(int i) { + return id; + } + + /** + * {@inheritDoc} + * + * Note that this method will actually set the user ID for <em>all</em> preferences. + */ + @Override + public void setUserID(int i, long userID) { + id = userID; + } + + @Override + public long getItemID(int i) { + return ids[i]; + } + + @Override + public void setItemID(int i, long itemID) { + ids[i] = itemID; + } + + /** + * @return all item IDs + */ + @Override + public long[] getIDs() { + return ids; + } + + @Override + public float getValue(int i) { + return 1.0f; + } + + @Override + public void setValue(int i, float value) { + throw new UnsupportedOperationException(); + } + + @Override + public void sortByUser() { } + + @Override + public void sortByItem() { + Arrays.sort(ids); + } + + @Override + public void sortByValue() { } + + @Override + public void sortByValueReversed() { } + + @Override + public boolean hasPrefWithUserID(long userID) { + return id == userID; + } + + @Override + public boolean hasPrefWithItemID(long itemID) { + for (long id : ids) { + if (itemID == id) { + return true; + } + } + return false; + } + + @Override + public BooleanUserPreferenceArray clone() { + return new BooleanUserPreferenceArray(ids.clone(), id); + } + + @Override + public int hashCode() { + return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof BooleanUserPreferenceArray)) { + return false; + } + BooleanUserPreferenceArray otherArray = (BooleanUserPreferenceArray) other; + return id == otherArray.id && Arrays.equals(ids, otherArray.ids); + } + + @Override + public Iterator<Preference> iterator() { + return Iterators.transform(new CountingIterator(length()), + new Function<Integer, Preference>() { + @Override + public Preference apply(Integer from) { + return new PreferenceView(from); + } + }); + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(10 * ids.length); + result.append("BooleanUserPreferenceArray[userID:"); + result.append(id); + result.append(",{"); + for (int i = 0; i < ids.length; i++) { + if (i > 0) { + result.append(','); + } + result.append(ids[i]); + } + result.append("}]"); + return result.toString(); + } + + private final class PreferenceView implements Preference { + + private final int i; + + private PreferenceView(int i) { + this.i = i; + } + + @Override + public long getUserID() { + return BooleanUserPreferenceArray.this.getUserID(i); + } + + @Override + public long getItemID() { + return BooleanUserPreferenceArray.this.getItemID(i); + } + + @Override + public float getValue() { + return 1.0f; + } + + @Override + public void setValue(float value) { + throw new UnsupportedOperationException(); + } + + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java new file mode 100644 index 0000000..2c1ff4d --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericBooleanPrefDataModel.java @@ -0,0 +1,320 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Collection; +import java.util.Map; + +import org.apache.mahout.cf.taste.common.NoSuchItemException; +import org.apache.mahout.cf.taste.common.NoSuchUserException; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.PreferenceArray; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple {@link DataModel} which uses given user data as its data source. This implementation + * is mostly useful for small experiments and is not recommended for contexts where performance is important. + * </p> + */ +public final class GenericBooleanPrefDataModel extends AbstractDataModel { + + private final long[] userIDs; + private final FastByIDMap<FastIDSet> preferenceFromUsers; + private final long[] itemIDs; + private final FastByIDMap<FastIDSet> preferenceForItems; + private final FastByIDMap<FastByIDMap<Long>> timestamps; + + /** + * <p> + * Creates a new {@link GenericDataModel} from the given users (and their preferences). This + * {@link DataModel} retains all this information in memory and is effectively immutable. + * </p> + * + * @param userData users to include + */ + public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData) { + this(userData, null); + } + + /** + * <p> + * Creates a new {@link GenericDataModel} from the given users (and their preferences). This + * {@link DataModel} retains all this information in memory and is effectively immutable. + * </p> + * + * @param userData users to include + * @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. + * User IDs are mapped to maps of item IDs to Long timestamps. + */ + public GenericBooleanPrefDataModel(FastByIDMap<FastIDSet> userData, FastByIDMap<FastByIDMap<Long>> timestamps) { + Preconditions.checkArgument(userData != null, "userData is null"); + + this.preferenceFromUsers = userData; + this.preferenceForItems = new FastByIDMap<>(); + FastIDSet itemIDSet = new FastIDSet(); + for (Map.Entry<Long, FastIDSet> entry : preferenceFromUsers.entrySet()) { + long userID = entry.getKey(); + FastIDSet itemIDs = entry.getValue(); + itemIDSet.addAll(itemIDs); + LongPrimitiveIterator it = itemIDs.iterator(); + while (it.hasNext()) { + long itemID = it.nextLong(); + FastIDSet userIDs = preferenceForItems.get(itemID); + if (userIDs == null) { + userIDs = new FastIDSet(2); + preferenceForItems.put(itemID, userIDs); + } + userIDs.add(userID); + } + } + + this.itemIDs = itemIDSet.toArray(); + itemIDSet = null; // Might help GC -- this is big + Arrays.sort(itemIDs); + + this.userIDs = new long[userData.size()]; + int i = 0; + LongPrimitiveIterator it = userData.keySetIterator(); + while (it.hasNext()) { + userIDs[i++] = it.next(); + } + Arrays.sort(userIDs); + + this.timestamps = timestamps; + } + + /** + * <p> + * Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given + * {@link DataModel}. + * </p> + * + * @param dataModel + * {@link DataModel} to copy + * @throws TasteException + * if an error occurs while retrieving the other {@link DataModel}'s users + * @deprecated without direct replacement. + * Consider {@link #toDataMap(DataModel)} with {@link #GenericBooleanPrefDataModel(FastByIDMap)} + */ + @Deprecated + public GenericBooleanPrefDataModel(DataModel dataModel) throws TasteException { + this(toDataMap(dataModel)); + } + + /** + * Exports the simple user IDs and associated item IDs in the data model. + * + * @return a {@link FastByIDMap} mapping user IDs to {@link FastIDSet}s representing + * that user's associated items + */ + public static FastByIDMap<FastIDSet> toDataMap(DataModel dataModel) throws TasteException { + FastByIDMap<FastIDSet> data = new FastByIDMap<>(dataModel.getNumUsers()); + LongPrimitiveIterator it = dataModel.getUserIDs(); + while (it.hasNext()) { + long userID = it.nextLong(); + data.put(userID, dataModel.getItemIDsFromUser(userID)); + } + return data; + } + + public static FastByIDMap<FastIDSet> toDataMap(FastByIDMap<PreferenceArray> data) { + for (Map.Entry<Long,Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) { + PreferenceArray prefArray = (PreferenceArray) entry.getValue(); + int size = prefArray.length(); + FastIDSet itemIDs = new FastIDSet(size); + for (int i = 0; i < size; i++) { + itemIDs.add(prefArray.getItemID(i)); + } + entry.setValue(itemIDs); + } + return (FastByIDMap<FastIDSet>) (FastByIDMap<?>) data; + } + + /** + * This is used mostly internally to the framework, and shouldn't be relied upon otherwise. + */ + public FastByIDMap<FastIDSet> getRawUserData() { + return this.preferenceFromUsers; + } + + /** + * This is used mostly internally to the framework, and shouldn't be relied upon otherwise. + */ + public FastByIDMap<FastIDSet> getRawItemData() { + return this.preferenceForItems; + } + + @Override + public LongPrimitiveArrayIterator getUserIDs() { + return new LongPrimitiveArrayIterator(userIDs); + } + + /** + * @throws NoSuchUserException + * if there is no such user + */ + @Override + public PreferenceArray getPreferencesFromUser(long userID) throws NoSuchUserException { + FastIDSet itemIDs = preferenceFromUsers.get(userID); + if (itemIDs == null) { + throw new NoSuchUserException(userID); + } + PreferenceArray prefArray = new BooleanUserPreferenceArray(itemIDs.size()); + int i = 0; + LongPrimitiveIterator it = itemIDs.iterator(); + while (it.hasNext()) { + prefArray.setUserID(i, userID); + prefArray.setItemID(i, it.nextLong()); + i++; + } + return prefArray; + } + + @Override + public FastIDSet getItemIDsFromUser(long userID) throws TasteException { + FastIDSet itemIDs = preferenceFromUsers.get(userID); + if (itemIDs == null) { + throw new NoSuchUserException(userID); + } + return itemIDs; + } + + @Override + public LongPrimitiveArrayIterator getItemIDs() { + return new LongPrimitiveArrayIterator(itemIDs); + } + + @Override + public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException { + FastIDSet userIDs = preferenceForItems.get(itemID); + if (userIDs == null) { + throw new NoSuchItemException(itemID); + } + PreferenceArray prefArray = new BooleanItemPreferenceArray(userIDs.size()); + int i = 0; + LongPrimitiveIterator it = userIDs.iterator(); + while (it.hasNext()) { + prefArray.setUserID(i, it.nextLong()); + prefArray.setItemID(i, itemID); + i++; + } + return prefArray; + } + + @Override + public Float getPreferenceValue(long userID, long itemID) throws NoSuchUserException { + FastIDSet itemIDs = preferenceFromUsers.get(userID); + if (itemIDs == null) { + throw new NoSuchUserException(userID); + } + if (itemIDs.contains(itemID)) { + return 1.0f; + } + return null; + } + + @Override + public Long getPreferenceTime(long userID, long itemID) throws TasteException { + if (timestamps == null) { + return null; + } + FastByIDMap<Long> itemTimestamps = timestamps.get(userID); + if (itemTimestamps == null) { + throw new NoSuchUserException(userID); + } + return itemTimestamps.get(itemID); + } + + @Override + public int getNumItems() { + return itemIDs.length; + } + + @Override + public int getNumUsers() { + return userIDs.length; + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID) { + FastIDSet userIDs1 = preferenceForItems.get(itemID); + return userIDs1 == null ? 0 : userIDs1.size(); + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) { + FastIDSet userIDs1 = preferenceForItems.get(itemID1); + if (userIDs1 == null) { + return 0; + } + FastIDSet userIDs2 = preferenceForItems.get(itemID2); + if (userIDs2 == null) { + return 0; + } + return userIDs1.size() < userIDs2.size() + ? userIDs2.intersectionSize(userIDs1) + : userIDs1.intersectionSize(userIDs2); + } + + @Override + public void removePreference(long userID, long itemID) { + throw new UnsupportedOperationException(); + } + + @Override + public void setPreference(long userID, long itemID, float value) { + throw new UnsupportedOperationException(); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + // Does nothing + } + + @Override + public boolean hasPreferenceValues() { + return false; + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(200); + result.append("GenericBooleanPrefDataModel[users:"); + for (int i = 0; i < Math.min(3, userIDs.length); i++) { + if (i > 0) { + result.append(','); + } + result.append(userIDs[i]); + } + if (userIDs.length > 3) { + result.append("..."); + } + result.append(']'); + return result.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java new file mode 100644 index 0000000..f58d349 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericDataModel.java @@ -0,0 +1,361 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Collection; +import java.util.List; +import java.util.Map; + +import com.google.common.collect.Lists; +import org.apache.mahout.cf.taste.common.NoSuchItemException; +import org.apache.mahout.cf.taste.common.NoSuchUserException; +import org.apache.mahout.cf.taste.common.Refreshable; +import org.apache.mahout.cf.taste.common.TasteException; +import org.apache.mahout.cf.taste.impl.common.FastByIDMap; +import org.apache.mahout.cf.taste.impl.common.FastIDSet; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveArrayIterator; +import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator; +import org.apache.mahout.cf.taste.model.DataModel; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple {@link DataModel} which uses a given {@link List} of users as its data source. This implementation + * is mostly useful for small experiments and is not recommended for contexts where performance is important. + * </p> + */ +public final class GenericDataModel extends AbstractDataModel { + + private static final Logger log = LoggerFactory.getLogger(GenericDataModel.class); + + private final long[] userIDs; + private final FastByIDMap<PreferenceArray> preferenceFromUsers; + private final long[] itemIDs; + private final FastByIDMap<PreferenceArray> preferenceForItems; + private final FastByIDMap<FastByIDMap<Long>> timestamps; + + /** + * <p> + * Creates a new {@link GenericDataModel} from the given users (and their preferences). This + * {@link DataModel} retains all this information in memory and is effectively immutable. + * </p> + * + * @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)}) + */ + public GenericDataModel(FastByIDMap<PreferenceArray> userData) { + this(userData, null); + } + + /** + * <p> + * Creates a new {@link GenericDataModel} from the given users (and their preferences). This + * {@link DataModel} retains all this information in memory and is effectively immutable. + * </p> + * + * @param userData users to include; (see also {@link #toDataMap(FastByIDMap, boolean)}) + * @param timestamps optionally, provided timestamps of preferences as milliseconds since the epoch. + * User IDs are mapped to maps of item IDs to Long timestamps. + */ + public GenericDataModel(FastByIDMap<PreferenceArray> userData, FastByIDMap<FastByIDMap<Long>> timestamps) { + Preconditions.checkArgument(userData != null, "userData is null"); + + this.preferenceFromUsers = userData; + FastByIDMap<Collection<Preference>> prefsForItems = new FastByIDMap<>(); + FastIDSet itemIDSet = new FastIDSet(); + int currentCount = 0; + float maxPrefValue = Float.NEGATIVE_INFINITY; + float minPrefValue = Float.POSITIVE_INFINITY; + for (Map.Entry<Long, PreferenceArray> entry : preferenceFromUsers.entrySet()) { + PreferenceArray prefs = entry.getValue(); + prefs.sortByItem(); + for (Preference preference : prefs) { + long itemID = preference.getItemID(); + itemIDSet.add(itemID); + Collection<Preference> prefsForItem = prefsForItems.get(itemID); + if (prefsForItem == null) { + prefsForItem = Lists.newArrayListWithCapacity(2); + prefsForItems.put(itemID, prefsForItem); + } + prefsForItem.add(preference); + float value = preference.getValue(); + if (value > maxPrefValue) { + maxPrefValue = value; + } + if (value < minPrefValue) { + minPrefValue = value; + } + } + if (++currentCount % 10000 == 0) { + log.info("Processed {} users", currentCount); + } + } + log.info("Processed {} users", currentCount); + + setMinPreference(minPrefValue); + setMaxPreference(maxPrefValue); + + this.itemIDs = itemIDSet.toArray(); + itemIDSet = null; // Might help GC -- this is big + Arrays.sort(itemIDs); + + this.preferenceForItems = toDataMap(prefsForItems, false); + + for (Map.Entry<Long, PreferenceArray> entry : preferenceForItems.entrySet()) { + entry.getValue().sortByUser(); + } + + this.userIDs = new long[userData.size()]; + int i = 0; + LongPrimitiveIterator it = userData.keySetIterator(); + while (it.hasNext()) { + userIDs[i++] = it.next(); + } + Arrays.sort(userIDs); + + this.timestamps = timestamps; + } + + /** + * <p> + * Creates a new {@link GenericDataModel} containing an immutable copy of the data from another given + * {@link DataModel}. + * </p> + * + * @param dataModel {@link DataModel} to copy + * @throws TasteException if an error occurs while retrieving the other {@link DataModel}'s users + * @deprecated without direct replacement. + * Consider {@link #toDataMap(DataModel)} with {@link #GenericDataModel(FastByIDMap)} + */ + @Deprecated + public GenericDataModel(DataModel dataModel) throws TasteException { + this(toDataMap(dataModel)); + } + + /** + * Swaps, in-place, {@link List}s for arrays in {@link Map} values . + * + * @return input value + */ + public static FastByIDMap<PreferenceArray> toDataMap(FastByIDMap<Collection<Preference>> data, + boolean byUser) { + for (Map.Entry<Long,Object> entry : ((FastByIDMap<Object>) (FastByIDMap<?>) data).entrySet()) { + List<Preference> prefList = (List<Preference>) entry.getValue(); + entry.setValue(byUser ? new GenericUserPreferenceArray(prefList) : new GenericItemPreferenceArray( + prefList)); + } + return (FastByIDMap<PreferenceArray>) (FastByIDMap<?>) data; + } + + /** + * Exports the simple user IDs and preferences in the data model. + * + * @return a {@link FastByIDMap} mapping user IDs to {@link PreferenceArray}s representing + * that user's preferences + */ + public static FastByIDMap<PreferenceArray> toDataMap(DataModel dataModel) throws TasteException { + FastByIDMap<PreferenceArray> data = new FastByIDMap<>(dataModel.getNumUsers()); + LongPrimitiveIterator it = dataModel.getUserIDs(); + while (it.hasNext()) { + long userID = it.nextLong(); + data.put(userID, dataModel.getPreferencesFromUser(userID)); + } + return data; + } + + /** + * This is used mostly internally to the framework, and shouldn't be relied upon otherwise. + */ + public FastByIDMap<PreferenceArray> getRawUserData() { + return this.preferenceFromUsers; + } + + /** + * This is used mostly internally to the framework, and shouldn't be relied upon otherwise. + */ + public FastByIDMap<PreferenceArray> getRawItemData() { + return this.preferenceForItems; + } + + @Override + public LongPrimitiveArrayIterator getUserIDs() { + return new LongPrimitiveArrayIterator(userIDs); + } + + /** + * @throws NoSuchUserException + * if there is no such user + */ + @Override + public PreferenceArray getPreferencesFromUser(long userID) throws NoSuchUserException { + PreferenceArray prefs = preferenceFromUsers.get(userID); + if (prefs == null) { + throw new NoSuchUserException(userID); + } + return prefs; + } + + @Override + public FastIDSet getItemIDsFromUser(long userID) throws TasteException { + PreferenceArray prefs = getPreferencesFromUser(userID); + int size = prefs.length(); + FastIDSet result = new FastIDSet(size); + for (int i = 0; i < size; i++) { + result.add(prefs.getItemID(i)); + } + return result; + } + + @Override + public LongPrimitiveArrayIterator getItemIDs() { + return new LongPrimitiveArrayIterator(itemIDs); + } + + @Override + public PreferenceArray getPreferencesForItem(long itemID) throws NoSuchItemException { + PreferenceArray prefs = preferenceForItems.get(itemID); + if (prefs == null) { + throw new NoSuchItemException(itemID); + } + return prefs; + } + + @Override + public Float getPreferenceValue(long userID, long itemID) throws TasteException { + PreferenceArray prefs = getPreferencesFromUser(userID); + int size = prefs.length(); + for (int i = 0; i < size; i++) { + if (prefs.getItemID(i) == itemID) { + return prefs.getValue(i); + } + } + return null; + } + + @Override + public Long getPreferenceTime(long userID, long itemID) throws TasteException { + if (timestamps == null) { + return null; + } + FastByIDMap<Long> itemTimestamps = timestamps.get(userID); + if (itemTimestamps == null) { + throw new NoSuchUserException(userID); + } + return itemTimestamps.get(itemID); + } + + @Override + public int getNumItems() { + return itemIDs.length; + } + + @Override + public int getNumUsers() { + return userIDs.length; + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID) { + PreferenceArray prefs1 = preferenceForItems.get(itemID); + return prefs1 == null ? 0 : prefs1.length(); + } + + @Override + public int getNumUsersWithPreferenceFor(long itemID1, long itemID2) { + PreferenceArray prefs1 = preferenceForItems.get(itemID1); + if (prefs1 == null) { + return 0; + } + PreferenceArray prefs2 = preferenceForItems.get(itemID2); + if (prefs2 == null) { + return 0; + } + + int size1 = prefs1.length(); + int size2 = prefs2.length(); + int count = 0; + int i = 0; + int j = 0; + long userID1 = prefs1.getUserID(0); + long userID2 = prefs2.getUserID(0); + while (true) { + if (userID1 < userID2) { + if (++i == size1) { + break; + } + userID1 = prefs1.getUserID(i); + } else if (userID1 > userID2) { + if (++j == size2) { + break; + } + userID2 = prefs2.getUserID(j); + } else { + count++; + if (++i == size1 || ++j == size2) { + break; + } + userID1 = prefs1.getUserID(i); + userID2 = prefs2.getUserID(j); + } + } + return count; + } + + @Override + public void removePreference(long userID, long itemID) { + throw new UnsupportedOperationException(); + } + + @Override + public void setPreference(long userID, long itemID, float value) { + throw new UnsupportedOperationException(); + } + + @Override + public void refresh(Collection<Refreshable> alreadyRefreshed) { + // Does nothing + } + + @Override + public boolean hasPreferenceValues() { + return true; + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(200); + result.append("GenericDataModel[users:"); + for (int i = 0; i < Math.min(3, userIDs.length); i++) { + if (i > 0) { + result.append(','); + } + result.append(userIDs[i]); + } + if (userIDs.length > 3) { + result.append("..."); + } + result.append(']'); + return result.toString(); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java new file mode 100644 index 0000000..fde9314 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericItemPreferenceArray.java @@ -0,0 +1,301 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.CountingIterator; + +/** + * <p> + * Like {@link GenericUserPreferenceArray} but stores preferences for one item (all item IDs the same) rather + * than one user. + * </p> + * + * @see BooleanItemPreferenceArray + * @see GenericUserPreferenceArray + * @see GenericPreference + */ +public final class GenericItemPreferenceArray implements PreferenceArray { + + private static final int USER = 0; + private static final int VALUE = 2; + private static final int VALUE_REVERSED = 3; + + private final long[] ids; + private long id; + private final float[] values; + + public GenericItemPreferenceArray(int size) { + this.ids = new long[size]; + values = new float[size]; + this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value + } + + public GenericItemPreferenceArray(List<? extends Preference> prefs) { + this(prefs.size()); + int size = prefs.size(); + long itemID = Long.MIN_VALUE; + for (int i = 0; i < size; i++) { + Preference pref = prefs.get(i); + ids[i] = pref.getUserID(); + if (i == 0) { + itemID = pref.getItemID(); + } else { + if (itemID != pref.getItemID()) { + throw new IllegalArgumentException("Not all item IDs are the same"); + } + } + values[i] = pref.getValue(); + } + id = itemID; + } + + /** + * This is a private copy constructor for clone(). + */ + private GenericItemPreferenceArray(long[] ids, long id, float[] values) { + this.ids = ids; + this.id = id; + this.values = values; + } + + @Override + public int length() { + return ids.length; + } + + @Override + public Preference get(int i) { + return new PreferenceView(i); + } + + @Override + public void set(int i, Preference pref) { + id = pref.getItemID(); + ids[i] = pref.getUserID(); + values[i] = pref.getValue(); + } + + @Override + public long getUserID(int i) { + return ids[i]; + } + + @Override + public void setUserID(int i, long userID) { + ids[i] = userID; + } + + @Override + public long getItemID(int i) { + return id; + } + + /** + * {@inheritDoc} + * + * Note that this method will actually set the item ID for <em>all</em> preferences. + */ + @Override + public void setItemID(int i, long itemID) { + id = itemID; + } + + /** + * @return all user IDs + */ + @Override + public long[] getIDs() { + return ids; + } + + @Override + public float getValue(int i) { + return values[i]; + } + + @Override + public void setValue(int i, float value) { + values[i] = value; + } + + @Override + public void sortByUser() { + lateralSort(USER); + } + + @Override + public void sortByItem() { } + + @Override + public void sortByValue() { + lateralSort(VALUE); + } + + @Override + public void sortByValueReversed() { + lateralSort(VALUE_REVERSED); + } + + @Override + public boolean hasPrefWithUserID(long userID) { + for (long id : ids) { + if (userID == id) { + return true; + } + } + return false; + } + + @Override + public boolean hasPrefWithItemID(long itemID) { + return id == itemID; + } + + private void lateralSort(int type) { + //Comb sort: http://en.wikipedia.org/wiki/Comb_sort + int length = length(); + int gap = length; + boolean swapped = false; + while (gap > 1 || swapped) { + if (gap > 1) { + gap /= 1.247330950103979; // = 1 / (1 - 1/e^phi) + } + swapped = false; + int max = length - gap; + for (int i = 0; i < max; i++) { + int other = i + gap; + if (isLess(other, i, type)) { + swap(i, other); + swapped = true; + } + } + } + } + + private boolean isLess(int i, int j, int type) { + switch (type) { + case USER: + return ids[i] < ids[j]; + case VALUE: + return values[i] < values[j]; + case VALUE_REVERSED: + return values[i] > values[j]; + default: + throw new IllegalStateException(); + } + } + + private void swap(int i, int j) { + long temp1 = ids[i]; + float temp2 = values[i]; + ids[i] = ids[j]; + values[i] = values[j]; + ids[j] = temp1; + values[j] = temp2; + } + + @Override + public GenericItemPreferenceArray clone() { + return new GenericItemPreferenceArray(ids.clone(), id, values.clone()); + } + + @Override + public int hashCode() { + return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids) ^ Arrays.hashCode(values); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof GenericItemPreferenceArray)) { + return false; + } + GenericItemPreferenceArray otherArray = (GenericItemPreferenceArray) other; + return id == otherArray.id && Arrays.equals(ids, otherArray.ids) && Arrays.equals(values, otherArray.values); + } + + @Override + public Iterator<Preference> iterator() { + return Iterators.transform(new CountingIterator(length()), + new Function<Integer, Preference>() { + @Override + public Preference apply(Integer from) { + return new PreferenceView(from); + } + }); + } + + @Override + public String toString() { + if (ids == null || ids.length == 0) { + return "GenericItemPreferenceArray[{}]"; + } + StringBuilder result = new StringBuilder(20 * ids.length); + result.append("GenericItemPreferenceArray[itemID:"); + result.append(id); + result.append(",{"); + for (int i = 0; i < ids.length; i++) { + if (i > 0) { + result.append(','); + } + result.append(ids[i]); + result.append('='); + result.append(values[i]); + } + result.append("}]"); + return result.toString(); + } + + private final class PreferenceView implements Preference { + + private final int i; + + private PreferenceView(int i) { + this.i = i; + } + + @Override + public long getUserID() { + return GenericItemPreferenceArray.this.getUserID(i); + } + + @Override + public long getItemID() { + return GenericItemPreferenceArray.this.getItemID(i); + } + + @Override + public float getValue() { + return values[i]; + } + + @Override + public void setValue(float value) { + values[i] = value; + } + + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java new file mode 100644 index 0000000..e6c7f43 --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericPreference.java @@ -0,0 +1,70 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.io.Serializable; + +import org.apache.mahout.cf.taste.model.Preference; + +import com.google.common.base.Preconditions; + +/** + * <p> + * A simple {@link Preference} encapsulating an item and preference value. + * </p> + */ +public class GenericPreference implements Preference, Serializable { + + private final long userID; + private final long itemID; + private float value; + + public GenericPreference(long userID, long itemID, float value) { + Preconditions.checkArgument(!Float.isNaN(value), "NaN value"); + this.userID = userID; + this.itemID = itemID; + this.value = value; + } + + @Override + public long getUserID() { + return userID; + } + + @Override + public long getItemID() { + return itemID; + } + + @Override + public float getValue() { + return value; + } + + @Override + public void setValue(float value) { + Preconditions.checkArgument(!Float.isNaN(value), "NaN value"); + this.value = value; + } + + @Override + public String toString() { + return "GenericPreference[userID: " + userID + ", itemID:" + itemID + ", value:" + value + ']'; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/410ed16a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java ---------------------------------------------------------------------- diff --git a/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java new file mode 100644 index 0000000..647feeb --- /dev/null +++ b/community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/GenericUserPreferenceArray.java @@ -0,0 +1,307 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.cf.taste.impl.model; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; + +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import org.apache.mahout.cf.taste.model.Preference; +import org.apache.mahout.cf.taste.model.PreferenceArray; +import org.apache.mahout.common.iterator.CountingIterator; + +/** + * <p> + * Like {@link GenericItemPreferenceArray} but stores preferences for one user (all user IDs the same) rather + * than one item. + * </p> + * + * <p> + * This implementation maintains two parallel arrays, of item IDs and values. The idea is to save allocating + * {@link Preference} objects themselves. This saves the overhead of {@link Preference} objects but also + * duplicating the user ID value. + * </p> + * + * @see BooleanUserPreferenceArray + * @see GenericItemPreferenceArray + * @see GenericPreference + */ +public final class GenericUserPreferenceArray implements PreferenceArray { + + private static final int ITEM = 1; + private static final int VALUE = 2; + private static final int VALUE_REVERSED = 3; + + private final long[] ids; + private long id; + private final float[] values; + + public GenericUserPreferenceArray(int size) { + this.ids = new long[size]; + values = new float[size]; + this.id = Long.MIN_VALUE; // as a sort of 'unspecified' value + } + + public GenericUserPreferenceArray(List<? extends Preference> prefs) { + this(prefs.size()); + int size = prefs.size(); + long userID = Long.MIN_VALUE; + for (int i = 0; i < size; i++) { + Preference pref = prefs.get(i); + if (i == 0) { + userID = pref.getUserID(); + } else { + if (userID != pref.getUserID()) { + throw new IllegalArgumentException("Not all user IDs are the same"); + } + } + ids[i] = pref.getItemID(); + values[i] = pref.getValue(); + } + id = userID; + } + + /** + * This is a private copy constructor for clone(). + */ + private GenericUserPreferenceArray(long[] ids, long id, float[] values) { + this.ids = ids; + this.id = id; + this.values = values; + } + + @Override + public int length() { + return ids.length; + } + + @Override + public Preference get(int i) { + return new PreferenceView(i); + } + + @Override + public void set(int i, Preference pref) { + id = pref.getUserID(); + ids[i] = pref.getItemID(); + values[i] = pref.getValue(); + } + + @Override + public long getUserID(int i) { + return id; + } + + /** + * {@inheritDoc} + * + * Note that this method will actually set the user ID for <em>all</em> preferences. + */ + @Override + public void setUserID(int i, long userID) { + id = userID; + } + + @Override + public long getItemID(int i) { + return ids[i]; + } + + @Override + public void setItemID(int i, long itemID) { + ids[i] = itemID; + } + + /** + * @return all item IDs + */ + @Override + public long[] getIDs() { + return ids; + } + + @Override + public float getValue(int i) { + return values[i]; + } + + @Override + public void setValue(int i, float value) { + values[i] = value; + } + + @Override + public void sortByUser() { } + + @Override + public void sortByItem() { + lateralSort(ITEM); + } + + @Override + public void sortByValue() { + lateralSort(VALUE); + } + + @Override + public void sortByValueReversed() { + lateralSort(VALUE_REVERSED); + } + + @Override + public boolean hasPrefWithUserID(long userID) { + return id == userID; + } + + @Override + public boolean hasPrefWithItemID(long itemID) { + for (long id : ids) { + if (itemID == id) { + return true; + } + } + return false; + } + + private void lateralSort(int type) { + //Comb sort: http://en.wikipedia.org/wiki/Comb_sort + int length = length(); + int gap = length; + boolean swapped = false; + while (gap > 1 || swapped) { + if (gap > 1) { + gap /= 1.247330950103979; // = 1 / (1 - 1/e^phi) + } + swapped = false; + int max = length - gap; + for (int i = 0; i < max; i++) { + int other = i + gap; + if (isLess(other, i, type)) { + swap(i, other); + swapped = true; + } + } + } + } + + private boolean isLess(int i, int j, int type) { + switch (type) { + case ITEM: + return ids[i] < ids[j]; + case VALUE: + return values[i] < values[j]; + case VALUE_REVERSED: + return values[i] > values[j]; + default: + throw new IllegalStateException(); + } + } + + private void swap(int i, int j) { + long temp1 = ids[i]; + float temp2 = values[i]; + ids[i] = ids[j]; + values[i] = values[j]; + ids[j] = temp1; + values[j] = temp2; + } + + @Override + public GenericUserPreferenceArray clone() { + return new GenericUserPreferenceArray(ids.clone(), id, values.clone()); + } + + @Override + public int hashCode() { + return (int) (id >> 32) ^ (int) id ^ Arrays.hashCode(ids) ^ Arrays.hashCode(values); + } + + @Override + public boolean equals(Object other) { + if (!(other instanceof GenericUserPreferenceArray)) { + return false; + } + GenericUserPreferenceArray otherArray = (GenericUserPreferenceArray) other; + return id == otherArray.id && Arrays.equals(ids, otherArray.ids) && Arrays.equals(values, otherArray.values); + } + + @Override + public Iterator<Preference> iterator() { + return Iterators.transform(new CountingIterator(length()), + new Function<Integer, Preference>() { + @Override + public Preference apply(Integer from) { + return new PreferenceView(from); + } + }); + } + + @Override + public String toString() { + if (ids == null || ids.length == 0) { + return "GenericUserPreferenceArray[{}]"; + } + StringBuilder result = new StringBuilder(20 * ids.length); + result.append("GenericUserPreferenceArray[userID:"); + result.append(id); + result.append(",{"); + for (int i = 0; i < ids.length; i++) { + if (i > 0) { + result.append(','); + } + result.append(ids[i]); + result.append('='); + result.append(values[i]); + } + result.append("}]"); + return result.toString(); + } + + private final class PreferenceView implements Preference { + + private final int i; + + private PreferenceView(int i) { + this.i = i; + } + + @Override + public long getUserID() { + return GenericUserPreferenceArray.this.getUserID(i); + } + + @Override + public long getItemID() { + return GenericUserPreferenceArray.this.getItemID(i); + } + + @Override + public float getValue() { + return values[i]; + } + + @Override + public void setValue(float value) { + values[i] = value; + } + + } + +}
