Author: srowen Date: Mon Apr 5 16:05:41 2010 New Revision: 930890 URL: http://svn.apache.org/viewvc?rev=930890&view=rev Log: MAHOUT-362 last refactorings for now
Added: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java - copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java - copied, changed from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java - copied, changed from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java Removed: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityWritable.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/UserPrefsPerItemMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/EntityPrefWritable.java Mon Apr 5 16:05:41 2010 @@ -21,11 +21,12 @@ import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; +import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Writable; import org.apache.mahout.common.RandomUtils; /** A {...@link Writable} encapsulating an item ID and a preference value. */ -public final class EntityPrefWritable extends EntityWritable { +public final class EntityPrefWritable extends LongWritable { private float prefValue; @@ -39,7 +40,11 @@ public final class EntityPrefWritable ex } public EntityPrefWritable(EntityPrefWritable other) { - this(other.getID(), other.getPrefValue()); + this(other.get(), other.getPrefValue()); + } + + public long getID() { + return get(); } public float getPrefValue() { @@ -75,12 +80,12 @@ public final class EntityPrefWritable ex return false; } EntityPrefWritable other = (EntityPrefWritable) o; - return getID() == other.getID() && prefValue == other.getPrefValue(); + return get() == other.get() && prefValue == other.getPrefValue(); } @Override public EntityPrefWritable clone() { - return new EntityPrefWritable(getID(), prefValue); + return new EntityPrefWritable(get(), prefValue); } } \ No newline at end of file Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java) URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToEntityPrefsMapper.java Mon Apr 5 16:05:41 2010 @@ -17,9 +17,6 @@ package org.apache.mahout.cf.taste.hadoop; -import java.io.IOException; -import java.util.regex.Pattern; - import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapred.JobConf; @@ -29,52 +26,45 @@ import org.apache.hadoop.mapred.OutputCo import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob; -/** - * <h1>Input</h1> - * - * <p> - * Intended for use with {...@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as - * {...@link LongWritable}/{...@link Text} pairs. - * </p> - * - * <p> - * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or - * <code>userID,itemID</code>.</p> - * </p> - * - * <h1>Output</h1> - * - * <p> - * Outputs the user ID as a {...@link LongWritable} mapped to the item ID and preference as a - * {...@link EntityPrefWritable}. - * </p> - */ -public final class ToItemPrefsMapper extends MapReduceBase implements - Mapper<LongWritable,Text,LongWritable, EntityWritable> { - +import java.io.IOException; +import java.util.regex.Pattern; + +abstract class ToEntityPrefsMapper extends MapReduceBase implements + Mapper<LongWritable,Text,LongWritable,LongWritable> { + private static final Pattern COMMA = Pattern.compile(","); private boolean booleanData; + private final boolean itemKey; + + ToEntityPrefsMapper(boolean itemKey) { + this.itemKey = itemKey; + } @Override public void configure(JobConf jobConf) { booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false); } - + @Override public void map(LongWritable key, Text value, - OutputCollector<LongWritable, EntityWritable> output, + OutputCollector<LongWritable,LongWritable> output, Reporter reporter) throws IOException { - String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString()); + String[] tokens = ToEntityPrefsMapper.COMMA.split(value.toString()); long userID = Long.parseLong(tokens[0]); long itemID = Long.parseLong(tokens[1]); + if (itemKey) { + long temp = userID; + userID = itemID; + itemID = temp; + } if (booleanData) { - output.collect(new LongWritable(userID), new EntityWritable(itemID)); + output.collect(new LongWritable(userID), new LongWritable(itemID)); } else { float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f; output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue)); } } - + } \ No newline at end of file Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java Mon Apr 5 16:05:41 2010 @@ -17,17 +17,8 @@ package org.apache.mahout.cf.taste.hadoop; -import java.io.IOException; -import java.util.regex.Pattern; - import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MapReduceBase; -import org.apache.hadoop.mapred.Mapper; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; -import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob; /** * <h1>Input</h1> @@ -49,32 +40,10 @@ import org.apache.mahout.cf.taste.hadoop * {...@link EntityPrefWritable}. * </p> */ -public final class ToItemPrefsMapper extends MapReduceBase implements - Mapper<LongWritable,Text,LongWritable, EntityWritable> { - - private static final Pattern COMMA = Pattern.compile(","); - - private boolean booleanData; +public final class ToItemPrefsMapper extends ToEntityPrefsMapper { - @Override - public void configure(JobConf jobConf) { - booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false); - } - - @Override - public void map(LongWritable key, - Text value, - OutputCollector<LongWritable, EntityWritable> output, - Reporter reporter) throws IOException { - String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString()); - long userID = Long.parseLong(tokens[0]); - long itemID = Long.parseLong(tokens[1]); - if (booleanData) { - output.collect(new LongWritable(userID), new EntityWritable(itemID)); - } else { - float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f; - output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue)); - } + public ToItemPrefsMapper() { + super(false); } } \ No newline at end of file Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java (from r930805, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java) URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java&r1=930805&r2=930890&rev=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToItemPrefsMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/ToUserPrefsMapper.java Mon Apr 5 16:05:41 2010 @@ -17,64 +17,13 @@ package org.apache.mahout.cf.taste.hadoop; -import java.io.IOException; -import java.util.regex.Pattern; - -import org.apache.hadoop.io.LongWritable; -import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapred.JobConf; -import org.apache.hadoop.mapred.MapReduceBase; -import org.apache.hadoop.mapred.Mapper; -import org.apache.hadoop.mapred.OutputCollector; -import org.apache.hadoop.mapred.Reporter; -import org.apache.mahout.cf.taste.hadoop.item.RecommenderJob; - /** - * <h1>Input</h1> - * - * <p> - * Intended for use with {...@link org.apache.hadoop.mapred.TextInputFormat}; accepts line number / line pairs as - * {...@link LongWritable}/{...@link Text} pairs. - * </p> - * - * <p> - * Each line is assumed to be of the form <code>userID,itemID,preference</code>, or - * <code>userID,itemID</code>.</p> - * </p> - * - * <h1>Output</h1> - * - * <p> - * Outputs the user ID as a {...@link LongWritable} mapped to the item ID and preference as a - * {...@link EntityPrefWritable}. - * </p> + * The 'reverse' of {...@link ToItemPrefsMapper}; outputs item IDs mapped to user-pref data. */ -public final class ToItemPrefsMapper extends MapReduceBase implements - Mapper<LongWritable,Text,LongWritable, EntityWritable> { - - private static final Pattern COMMA = Pattern.compile(","); - - private boolean booleanData; +public final class ToUserPrefsMapper extends ToEntityPrefsMapper { - @Override - public void configure(JobConf jobConf) { - booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false); + public ToUserPrefsMapper() { + super(true); } - - @Override - public void map(LongWritable key, - Text value, - OutputCollector<LongWritable, EntityWritable> output, - Reporter reporter) throws IOException { - String[] tokens = ToItemPrefsMapper.COMMA.split(value.toString()); - long userID = Long.parseLong(tokens[0]); - long itemID = Long.parseLong(tokens[1]); - if (booleanData) { - output.collect(new LongWritable(userID), new EntityWritable(itemID)); - } else { - float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f; - output.collect(new LongWritable(userID), new EntityPrefWritable(itemID, prefValue)); - } - } - + } \ No newline at end of file Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java Mon Apr 5 16:05:41 2010 @@ -36,7 +36,6 @@ import org.apache.hadoop.mapred.TextOutp import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.hadoop.util.ToolRunner; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; import org.apache.mahout.common.AbstractJob; import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable; import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper; @@ -100,7 +99,7 @@ public final class RecommenderJob extend JobConf toUserVectorConf = prepareJobConf(inputPath, userVectorPath, TextInputFormat.class, ToItemPrefsMapper.class, LongWritable.class, - booleanData ? EntityWritable.class : EntityPrefWritable.class, + booleanData ? LongWritable.class : EntityPrefWritable.class, ToUserVectorReducer.class, LongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectorConf.setBoolean(BOOLEAN_DATA, booleanData); JobClient.runJob(toUserVectorConf); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java Mon Apr 5 16:05:41 2010 @@ -29,7 +29,6 @@ import org.apache.hadoop.mapred.OutputCo import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; @@ -58,7 +57,7 @@ import org.apache.mahout.math.VectorWrit * */ public final class ToUserVectorReducer extends MapReduceBase implements - Reducer<LongWritable, EntityWritable,LongWritable,VectorWritable> { + Reducer<LongWritable,LongWritable,LongWritable,VectorWritable> { public static final int MAX_PREFS_CONSIDERED = 20; @@ -72,14 +71,14 @@ public final class ToUserVectorReducer e @Override public void reduce(LongWritable userID, - Iterator<EntityWritable> itemPrefs, + Iterator<LongWritable> itemPrefs, OutputCollector<LongWritable,VectorWritable> output, Reporter reporter) throws IOException { if (itemPrefs.hasNext()) { RandomAccessSparseVector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100); while (itemPrefs.hasNext()) { - EntityWritable itemPref = itemPrefs.next(); - int index = ItemIDIndexMapper.idToIndex(itemPref.getID()); + LongWritable itemPref = itemPrefs.next(); + int index = ItemIDIndexMapper.idToIndex(itemPref.get()); float value; if (itemPref instanceof EntityPrefWritable) { value = ((EntityPrefWritable) itemPref).getPrefValue(); Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CopreferredItemsMapper.java Mon Apr 5 16:05:41 2010 @@ -20,22 +20,25 @@ package org.apache.mahout.cf.taste.hadoo import java.io.IOException; import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; /** * map out each pair of items that appears in the same user-vector together with the multiplied vector lengths * of the associated item vectors */ -public final class CopreferredItemsMapper - extends Mapper<EntityWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> { +public final class CopreferredItemsMapper extends MapReduceBase + implements Mapper<LongWritable,ItemPrefWithLengthArrayWritable,ItemPairWritable,FloatWritable> { @Override - protected void map(EntityWritable user, ItemPrefWithLengthArrayWritable itemPrefsArray, Context context) - throws IOException, InterruptedException { + public void map(LongWritable user, + ItemPrefWithLengthArrayWritable itemPrefsArray, + OutputCollector<ItemPairWritable,FloatWritable> output, + Reporter reporter) + throws IOException { ItemPrefWithLengthWritable[] itemPrefs = itemPrefsArray.getItemPrefs(); @@ -49,7 +52,7 @@ public final class CopreferredItemsMapp long itemAID = Math.min(itemNID, itemM.getItemID()); long itemBID = Math.max(itemNID, itemM.getItemID()); ItemPairWritable pair = new ItemPairWritable(itemAID, itemBID, itemNLength * itemM.getLength()); - context.write(pair, new FloatWritable(itemNValue * itemM.getPrefValue())); + output.collect(pair, new FloatWritable(itemNValue * itemM.getPrefValue())); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CosineSimilarityReducer.java Mon Apr 5 16:05:41 2010 @@ -18,34 +18,36 @@ package org.apache.mahout.cf.taste.hadoop.similarity.item; import java.io.IOException; +import java.util.Iterator; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable; /** * Finally compute the cosine for each item-pair */ -public final class CosineSimilarityReducer - extends Reducer<ItemPairWritable,FloatWritable, EntityEntityWritable,DoubleWritable> { +public final class CosineSimilarityReducer extends MapReduceBase + implements Reducer<ItemPairWritable,FloatWritable,EntityEntityWritable,DoubleWritable> { @Override - protected void reduce(ItemPairWritable pair, Iterable<FloatWritable> numeratorSummands, Context context) - throws IOException, InterruptedException { + public void reduce(ItemPairWritable pair, + Iterator<FloatWritable> numeratorSummands, + OutputCollector<EntityEntityWritable,DoubleWritable> output, + Reporter reporter) + throws IOException { double numerator = 0.0; - - for (FloatWritable nummeratorSummand : numeratorSummands) { - numerator += nummeratorSummand.get(); + while (numeratorSummands.hasNext()) { + numerator += numeratorSummands.next().get(); } - double denominator = pair.getMultipliedLength(); - double cosine = numerator / denominator; - - context.write(pair.getItemItemWritable(), new DoubleWritable(cosine)); + output.collect(pair.getItemItemWritable(), new DoubleWritable(cosine)); } } Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java) URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java&r1=930806&r2=930890&rev=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPairWritable.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPairWritable.java Mon Apr 5 16:05:41 2010 @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.mahout.cf.taste.hadoop.similarity.item.writables; +package org.apache.mahout.cf.taste.hadoop.similarity.item; import java.io.DataInput; import java.io.DataOutput; Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java) URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java&r1=930806&r2=930890&rev=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthArrayWritable.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthArrayWritable.java Mon Apr 5 16:05:41 2010 @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.mahout.cf.taste.hadoop.similarity.item.writables; +package org.apache.mahout.cf.taste.hadoop.similarity.item; import org.apache.hadoop.io.ArrayWritable; Copied: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java (from r930806, lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java) URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java?p2=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java&p1=lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java&r1=930806&r2=930890&rev=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/writables/ItemPrefWithLengthWritable.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemPrefWithLengthWritable.java Mon Apr 5 16:05:41 2010 @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.mahout.cf.taste.hadoop.similarity.item.writables; +package org.apache.mahout.cf.taste.hadoop.similarity.item; import java.io.DataInput; import java.io.DataOutput; Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java Mon Apr 5 16:05:41 2010 @@ -17,32 +17,24 @@ package org.apache.mahout.cf.taste.hadoop.similarity.item; -import java.io.IOException; import java.util.Map; +import org.apache.commons.cli2.Option; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.DoubleWritable; import org.apache.hadoop.io.FloatWritable; -import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapreduce.Job; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat; -import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; -import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat; -import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.JobClient; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.mapred.SequenceFileOutputFormat; +import org.apache.hadoop.mapred.TextInputFormat; +import org.apache.hadoop.mapred.TextOutputFormat; import org.apache.hadoop.util.ToolRunner; +import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; -import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable; +import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper; import org.apache.mahout.common.AbstractJob; /** @@ -108,7 +100,6 @@ public final class ItemSimilarityJob ext public int run(String[] args) throws Exception { Map<String,String> parsedArgs = AbstractJob.parseArguments(args); - if (parsedArgs == null) { return -1; } @@ -121,84 +112,47 @@ public final class ItemSimilarityJob ext String itemVectorsPath = tempDirPath + "/itemVectors"; String userVectorsPath = tempDirPath + "/userVectors"; - Job itemVectors = createJob(originalConf, "itemVectors", inputPath, itemVectorsPath, UserPrefsPerItemMapper.class, - EntityWritable.class, EntityPrefWritable.class, ToItemVectorReducer.class, EntityWritable.class, - EntityPrefWritableArrayWritable.class, TextInputFormat.class, SequenceFileOutputFormat.class, true); - - itemVectors.waitForCompletion(true); - - Job userVectors = createJob(originalConf, "userVectors", itemVectorsPath, userVectorsPath, - PreferredItemsPerUserMapper.class, EntityWritable.class, ItemPrefWithLengthWritable.class, - PreferredItemsPerUserReducer.class, EntityWritable.class, ItemPrefWithLengthArrayWritable.class); - - userVectors.waitForCompletion(true); - - Job similarity = createJob(originalConf, "similarity", userVectorsPath, outputPath, - CopreferredItemsMapper.class, ItemPairWritable.class, FloatWritable.class, CosineSimilarityReducer.class, - EntityEntityWritable.class, DoubleWritable.class, SequenceFileInputFormat.class, TextOutputFormat.class, false); - - similarity.waitForCompletion(true); + JobConf itemVectors = prepareJobConf(inputPath, + itemVectorsPath, + TextInputFormat.class, + ToUserPrefsMapper.class, + LongWritable.class, + EntityPrefWritable.class, + ToItemVectorReducer.class, + LongWritable.class, + EntityPrefWritableArrayWritable.class, + SequenceFileOutputFormat.class); + JobClient.runJob(itemVectors); + + JobConf userVectors = prepareJobConf(itemVectorsPath, + userVectorsPath, + SequenceFileInputFormat.class, + PreferredItemsPerUserMapper.class, + LongWritable.class, + ItemPrefWithLengthWritable.class, + PreferredItemsPerUserReducer.class, + LongWritable.class, + ItemPrefWithLengthArrayWritable.class, + SequenceFileOutputFormat.class); + JobClient.runJob(userVectors); + + JobConf similarity = prepareJobConf(userVectorsPath, + outputPath, + SequenceFileInputFormat.class, + CopreferredItemsMapper.class, + ItemPairWritable.class, + FloatWritable.class, + CosineSimilarityReducer.class, + EntityEntityWritable.class, + DoubleWritable.class, + TextOutputFormat.class); + JobClient.runJob(similarity); return 0; } public static void main(String[] args) throws Exception { - ToolRunner.run(new Configuration(), new ItemSimilarityJob(), args); - } - - protected static Job createJob(Configuration conf, - String jobName, - String inputPath, - String outputPath, - Class<? extends Mapper> mapperClass, - Class<? extends Writable> mapKeyOutClass, - Class<? extends Writable> mapValueOutClass, - Class<? extends Reducer> reducerClass, - Class<? extends Writable> keyOutClass, - Class<? extends Writable> valueOutClass) throws IOException { - return createJob(conf, jobName, inputPath, outputPath, mapperClass, mapKeyOutClass, - mapValueOutClass, reducerClass, keyOutClass, valueOutClass, SequenceFileInputFormat.class, - SequenceFileOutputFormat.class, true); - } - - protected static Job createJob(Configuration conf, - String jobName, - String inputPath, - String outputPath, - Class<? extends Mapper> mapperClass, - Class<? extends Writable> mapKeyOutClass, - Class<? extends Writable> mapValueOutClass, - Class<? extends Reducer> reducerClass, - Class<? extends Writable> keyOutClass, - Class<? extends Writable> valueOutClass, - Class<? extends FileInputFormat> fileInputFormatClass, - Class<? extends FileOutputFormat> fileOutputFormatClass, - boolean compress) throws IOException { - - Job job = new Job(conf, jobName); - - FileSystem fs = FileSystem.get(conf); - - Path inputPathPath = new Path(inputPath).makeQualified(fs); - Path outputPathPath = new Path(outputPath).makeQualified(fs); - - FileInputFormat.setInputPaths(job, inputPathPath); - job.setInputFormatClass(fileInputFormatClass); - - job.setMapperClass(mapperClass); - job.setMapOutputKeyClass(mapKeyOutClass); - job.setMapOutputValueClass(mapValueOutClass); - - job.setReducerClass(reducerClass); - job.setOutputKeyClass(keyOutClass); - job.setOutputValueClass(valueOutClass); - - - FileOutputFormat.setOutputPath(job, outputPathPath); - FileOutputFormat.setCompressOutput(job, compress); - job.setOutputFormatClass(fileOutputFormatClass); - - return job; + ToolRunner.run(new ItemSimilarityJob(), args); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserMapper.java Mon Apr 5 16:05:41 2010 @@ -19,22 +19,26 @@ package org.apache.mahout.cf.taste.hadoo import java.io.IOException; -import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.Mapper; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable; /** * for each item-vector, we compute its length here and map out all entries with the user as key, * so we can create the user-vectors in the reducer */ -public final class PreferredItemsPerUserMapper - extends Mapper<EntityWritable, EntityPrefWritableArrayWritable,EntityWritable,ItemPrefWithLengthWritable> { +public final class PreferredItemsPerUserMapper extends MapReduceBase + implements Mapper<LongWritable,EntityPrefWritableArrayWritable,LongWritable,ItemPrefWithLengthWritable> { @Override - protected void map(EntityWritable item, EntityPrefWritableArrayWritable userPrefsArray, Context context) - throws IOException, InterruptedException { + public void map(LongWritable item, + EntityPrefWritableArrayWritable userPrefsArray, + OutputCollector<LongWritable,ItemPrefWithLengthWritable> output, + Reporter reporter) throws IOException { EntityPrefWritable[] userPrefs = userPrefsArray.getPrefs(); @@ -47,8 +51,8 @@ public final class PreferredItemsPerUser length = Math.sqrt(length); for (EntityPrefWritable userPref : userPrefs) { - context.write(new EntityWritable(userPref.getID()), - new ItemPrefWithLengthWritable(item.getID(), length, userPref.getPrefValue())); + output.collect(new LongWritable(userPref.getID()), + new ItemPrefWithLengthWritable(item.get(), length, userPref.getPrefValue())); } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PreferredItemsPerUserReducer.java Mon Apr 5 16:05:41 2010 @@ -19,27 +19,32 @@ package org.apache.mahout.cf.taste.hadoo import java.io.IOException; import java.util.HashSet; +import java.util.Iterator; import java.util.Set; -import org.apache.hadoop.mapreduce.Reducer; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; -public final class PreferredItemsPerUserReducer - extends Reducer<EntityWritable,ItemPrefWithLengthWritable, EntityWritable,ItemPrefWithLengthArrayWritable> { +public final class PreferredItemsPerUserReducer extends MapReduceBase + implements Reducer<LongWritable,ItemPrefWithLengthWritable, LongWritable,ItemPrefWithLengthArrayWritable> { @Override - protected void reduce(EntityWritable user, Iterable<ItemPrefWithLengthWritable> itemPrefs, Context context) - throws IOException, InterruptedException { + public void reduce(LongWritable user, + Iterator<ItemPrefWithLengthWritable> itemPrefs, + OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output, + Reporter reporter) + throws IOException { Set<ItemPrefWithLengthWritable> itemPrefsWithLength = new HashSet<ItemPrefWithLengthWritable>(); - for (ItemPrefWithLengthWritable itemPrefWithLength : itemPrefs) { - itemPrefsWithLength.add(itemPrefWithLength.deepCopy()); + while (itemPrefs.hasNext()) { + itemPrefsWithLength.add(itemPrefs.next().clone()); } - context.write(user, new ItemPrefWithLengthArrayWritable( + output.collect(user, new ItemPrefWithLengthArrayWritable( itemPrefsWithLength.toArray(new ItemPrefWithLengthWritable[itemPrefsWithLength.size()]))); } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ToItemVectorReducer.java Mon Apr 5 16:05:41 2010 @@ -19,31 +19,39 @@ package org.apache.mahout.cf.taste.hadoo import java.io.IOException; import java.util.HashSet; +import java.util.Iterator; import java.util.Set; -import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.mapred.MapReduceBase; +import org.apache.hadoop.mapred.OutputCollector; +import org.apache.hadoop.mapred.Reducer; +import org.apache.hadoop.mapred.Reporter; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; /** * For each single item, collect all users with their preferences * (thereby building the item vectors of the user-item-matrix) */ public final class ToItemVectorReducer - extends Reducer<EntityWritable, EntityPrefWritable, EntityWritable, EntityPrefWritableArrayWritable> { + extends MapReduceBase implements + Reducer<LongWritable,EntityPrefWritable,LongWritable,EntityPrefWritableArrayWritable> { @Override - protected void reduce(EntityWritable item, Iterable<EntityPrefWritable> userPrefs, Context context) - throws IOException, InterruptedException { + public void reduce(LongWritable item, + Iterator<EntityPrefWritable> userPrefs, + OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output, + Reporter reporter) + throws IOException { Set<EntityPrefWritable> collectedUserPrefs = new HashSet<EntityPrefWritable>(); - for (EntityPrefWritable userPref : userPrefs) { - collectedUserPrefs.add(userPref.clone()); + while (userPrefs.hasNext()) { + collectedUserPrefs.add(userPrefs.next().clone()); } - context.write(item, new EntityPrefWritableArrayWritable( + output.collect(item, new EntityPrefWritableArrayWritable( collectedUserPrefs.toArray(new EntityPrefWritable[collectedUserPrefs.size()]))); } Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=930890&r1=930889&r2=930890&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java Mon Apr 5 16:05:41 2010 @@ -17,12 +17,6 @@ package org.apache.mahout.cf.taste.hadoop.similarity.item; -import static org.easymock.EasyMock.eq; -import static org.easymock.EasyMock.expect; -import static org.easymock.classextension.EasyMock.createMock; -import static org.easymock.classextension.EasyMock.replay; -import static org.easymock.classextension.EasyMock.verify; - import java.io.BufferedReader; import java.io.BufferedWriter; import java.io.File; @@ -39,18 +33,15 @@ import org.apache.hadoop.io.DoubleWritab import org.apache.hadoop.io.FloatWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; -import org.apache.hadoop.mapreduce.Mapper; -import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapred.OutputCollector; +import org.easymock.classextension.EasyMock; +import org.easymock.IArgumentMatcher; + import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable; import org.apache.mahout.cf.taste.hadoop.EntityPrefWritableArrayWritable; -import org.apache.mahout.cf.taste.hadoop.EntityWritable; import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPairWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthArrayWritable; -import org.apache.mahout.cf.taste.hadoop.similarity.item.writables.ItemPrefWithLengthWritable; +import org.apache.mahout.cf.taste.hadoop.ToUserPrefsMapper; import org.apache.mahout.common.MahoutTestCase; -import org.easymock.IArgumentMatcher; -import org.easymock.classextension.EasyMock; /** * Unit tests for the mappers and reducers in org.apache.mahout.cf.taste.hadoop.similarity @@ -61,36 +52,41 @@ public class ItemSimilarityTest extends public void testUserPrefsPerItemMapper() throws Exception { - Mapper.Context ctx = createMock(Mapper.Context.class); - ctx.write(new EntityWritable(34L), new EntityPrefWritable(12L, 2.3f)); - replay(ctx); + OutputCollector<LongWritable,LongWritable> output = + EasyMock.createMock(OutputCollector.class); + output.collect(new LongWritable(34L), new EntityPrefWritable(12L, 2.3f)); + EasyMock.replay(output); - new UserPrefsPerItemMapper().map(new LongWritable(), new Text("12,34,2.3"), ctx); + new ToUserPrefsMapper().map(new LongWritable(), new Text("12,34,2.3"), output, null); - verify(ctx); + EasyMock.verify(output); } public void testToItemVectorReducer() throws Exception { - List<EntityPrefWritable> userPrefs = Arrays.asList(new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f)); + List<EntityPrefWritable> userPrefs = Arrays.asList( + new EntityPrefWritable(34L, 1.0f), new EntityPrefWritable(56L, 2.0f)); - Reducer.Context ctx = createMock(Reducer.Context.class); + OutputCollector<LongWritable,EntityPrefWritableArrayWritable> output = + EasyMock.createMock(OutputCollector.class); - ctx.write(eq(new EntityWritable(12L)), equalToUserPrefs(userPrefs)); + output.collect(EasyMock.eq(new LongWritable(12L)), equalToUserPrefs(userPrefs)); - replay(ctx); + EasyMock.replay(output); - new ToItemVectorReducer().reduce(new EntityWritable(12L), userPrefs, ctx); + new ToItemVectorReducer().reduce(new LongWritable(12L), userPrefs.iterator(), output, null); - verify(ctx); + EasyMock.verify(output); } - static EntityPrefWritableArrayWritable equalToUserPrefs(final Collection<EntityPrefWritable> prefsToCheck) { + static EntityPrefWritableArrayWritable equalToUserPrefs( + final Collection<EntityPrefWritable> prefsToCheck) { EasyMock.reportMatcher(new IArgumentMatcher() { @Override public boolean matches(Object argument) { if (argument instanceof EntityPrefWritableArrayWritable) { - EntityPrefWritableArrayWritable userPrefArray = (EntityPrefWritableArrayWritable) argument; + EntityPrefWritableArrayWritable userPrefArray = + (EntityPrefWritableArrayWritable) argument; Set<EntityPrefWritable> set = new HashSet<EntityPrefWritable>(); set.addAll(Arrays.asList(userPrefArray.getPrefs())); @@ -116,50 +112,56 @@ public class ItemSimilarityTest extends } public void testPreferredItemsPerUserMapper() throws Exception { - Mapper.Context ctx = createMock(Mapper.Context.class); - EntityPrefWritableArrayWritable userPrefs = createMock(EntityPrefWritableArrayWritable.class); - - expect(userPrefs.getPrefs()) - .andReturn(new EntityPrefWritable[] { new EntityPrefWritable(12L, 2.0f), new EntityPrefWritable(56L, 3.0f) }); + OutputCollector<LongWritable,ItemPrefWithLengthWritable> output = + EasyMock.createMock(OutputCollector.class); + EntityPrefWritableArrayWritable userPrefs = + EasyMock.createMock(EntityPrefWritableArrayWritable.class); + + EasyMock.expect(userPrefs.getPrefs()).andReturn( + new EntityPrefWritable[] { + new EntityPrefWritable(12L, 2.0f), + new EntityPrefWritable(56L, 3.0f) }); double length = Math.sqrt(Math.pow(2.0f, 2) + Math.pow(3.0f, 2)); - ctx.write(new EntityWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f)); - ctx.write(new EntityWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f)); + output.collect(new LongWritable(12L), new ItemPrefWithLengthWritable(34L, length, 2.0f)); + output.collect(new LongWritable(56L), new ItemPrefWithLengthWritable(34L, length, 3.0f)); - replay(ctx, userPrefs); + EasyMock.replay(output, userPrefs); - new PreferredItemsPerUserMapper().map(new EntityWritable(34L), userPrefs, ctx); + new PreferredItemsPerUserMapper().map(new LongWritable(34L), userPrefs, output, null); - verify(ctx, userPrefs); + EasyMock.verify(output, userPrefs); } public void testPreferredItemsPerUserReducer() throws Exception { List<ItemPrefWithLengthWritable> itemPrefs = - Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f), new ItemPrefWithLengthWritable(56L, 7.0, 2.0f)); + Arrays.asList(new ItemPrefWithLengthWritable(34L, 5.0, 1.0f), + new ItemPrefWithLengthWritable(56L, 7.0, 2.0f)); - Reducer.Context ctx = createMock(Reducer.Context.class); + OutputCollector<LongWritable,ItemPrefWithLengthArrayWritable> output = + EasyMock.createMock(OutputCollector.class); - ctx.write(eq(new EntityWritable(12L)), equalToItemPrefs(itemPrefs)); + output.collect(EasyMock.eq(new LongWritable(12L)), equalToItemPrefs(itemPrefs)); - replay(ctx); + EasyMock.replay(output); - new PreferredItemsPerUserReducer().reduce(new EntityWritable(12L), itemPrefs, ctx); + new PreferredItemsPerUserReducer().reduce( + new LongWritable(12L), itemPrefs.iterator(), output, null); - verify(ctx); + EasyMock.verify(output); } - static ItemPrefWithLengthArrayWritable equalToItemPrefs(final Collection<ItemPrefWithLengthWritable> prefsToCheck) { + static ItemPrefWithLengthArrayWritable equalToItemPrefs( + final Collection<ItemPrefWithLengthWritable> prefsToCheck) { EasyMock.reportMatcher(new IArgumentMatcher() { @Override public boolean matches(Object argument) { if (argument instanceof ItemPrefWithLengthArrayWritable) { ItemPrefWithLengthArrayWritable itemPrefArray = (ItemPrefWithLengthArrayWritable) argument; - Set<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>(); - for (ItemPrefWithLengthWritable itemPref : itemPrefArray.getItemPrefs()) { - set.add(itemPref); - } + Collection<ItemPrefWithLengthWritable> set = new HashSet<ItemPrefWithLengthWritable>(); + set.addAll(Arrays.asList(itemPrefArray.getItemPrefs())); if (set.size() != prefsToCheck.size()) { return false; @@ -183,40 +185,45 @@ public class ItemSimilarityTest extends } public void testCopreferredItemsMapper() throws Exception { - Mapper.Context ctx = createMock(Mapper.Context.class); - ItemPrefWithLengthArrayWritable itemPrefs = createMock(ItemPrefWithLengthArrayWritable.class); + OutputCollector<ItemPairWritable,FloatWritable> output = + EasyMock.createMock(OutputCollector.class); + ItemPrefWithLengthArrayWritable itemPrefs = + EasyMock.createMock(ItemPrefWithLengthArrayWritable.class); - expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] { + EasyMock.expect(itemPrefs.getItemPrefs()).andReturn(new ItemPrefWithLengthWritable[] { new ItemPrefWithLengthWritable(34L, 2.0, 1.0f), new ItemPrefWithLengthWritable(56L, 3.0, 2.0f), new ItemPrefWithLengthWritable(78L, 4.0, 3.0f) }); - ctx.write(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f)); - ctx.write(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f)); - ctx.write(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f)); + output.collect(new ItemPairWritable(34L, 56L, 6.0), new FloatWritable(2.0f)); + output.collect(new ItemPairWritable(34L, 78L, 8.0), new FloatWritable(3.0f)); + output.collect(new ItemPairWritable(56L, 78L, 12.0), new FloatWritable(6.0f)); - replay(ctx, itemPrefs); + EasyMock.replay(output, itemPrefs); - new CopreferredItemsMapper().map(new EntityWritable(), itemPrefs, ctx); + new CopreferredItemsMapper().map(new LongWritable(), itemPrefs, output, null); - verify(ctx, itemPrefs); + EasyMock.verify(output, itemPrefs); } public void testCosineSimilarityReducer() throws Exception { - Reducer.Context ctx = createMock(Reducer.Context.class); + OutputCollector<EntityEntityWritable,DoubleWritable> output = + EasyMock.createMock(OutputCollector.class); - ctx.write(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d)); + output.collect(new EntityEntityWritable(12L, 34L), new DoubleWritable(0.5d)); - replay(ctx); + EasyMock.replay(output); new CosineSimilarityReducer().reduce(new ItemPairWritable(12L, 34L, 20.0), - Arrays.asList(new FloatWritable(5.0f), new FloatWritable(5.0f)), ctx); + Arrays.asList(new FloatWritable(5.0f), + new FloatWritable(5.0f)).iterator(), output, null); - verify(ctx); + EasyMock.verify(output); } public void testCompleteJob() throws Exception { - String tmpDirPath = System.getProperty("java.io.tmpdir")+ '/' +ItemSimilarityTest.class.getCanonicalName(); + String tmpDirPath = System.getProperty("java.io.tmpdir") + + ItemSimilarityTest.class.getCanonicalName(); File tmpDir = new File(tmpDirPath); try { @@ -250,14 +257,16 @@ public class ItemSimilarityTest extends Configuration conf = new Configuration(); conf.set("mapred.input.dir", tmpDirPath+"/prefs.txt"); conf.set("mapred.output.dir", tmpDirPath+"/output"); + conf.set("mapred.output.compress", Boolean.FALSE.toString()); similarityJob.setConf(conf); similarityJob.run(new String[] { "--tempDir", tmpDirPath+"/tmp"}); - BufferedReader reader = new BufferedReader(new FileReader(tmpDirPath+"/output/part-r-00000")); + String filePath = tmpDirPath+"/output/part-00000"; + BufferedReader reader = new BufferedReader(new FileReader(filePath)); - String line = null; + String line; int currentLine = 1; while ( (line = reader.readLine()) != null) {