Author: ssc
Date: Sun Jun 19 21:54:08 2011
New Revision: 1137456

URL: http://svn.apache.org/viewvc?rev=1137456&view=rev
Log:
MAHOUT-736 Save one pass through the data in ItemSimilarityJob and 
RecommenderJob by intelligently using counters

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
 Sun Jun 19 21:54:08 2011
@@ -82,21 +82,4 @@ public final class TasteHadoopUtils {
     return indexItemIDMap;
   }
 
-  /**
-   * Reads a text-based outputfile that only contains an int
-   */
-  public static int readIntFromFile(Configuration conf, Path outputDir) throws 
IOException {
-    FileSystem fs = outputDir.getFileSystem(conf);
-    Path outputFile = fs.listStatus(outputDir, 
PathFilters.partFilter())[0].getPath();
-    InputStream in = null;
-    ByteArrayOutputStream out = new ByteArrayOutputStream();
-    try  {
-      in = fs.open(outputFile);
-      IOUtils.copyBytes(in, out, conf);
-      return Integer.parseInt(new String(out.toByteArray(), 
Charsets.UTF_8).trim());
-    } finally {
-      Closeables.closeQuietly(in);
-      Closeables.closeQuietly(out);
-    }
-  }
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
 Sun Jun 19 21:54:08 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.cf.taste.hadoo
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -33,11 +32,7 @@ import org.apache.hadoop.util.ToolRunner
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
 import org.apache.mahout.cf.taste.hadoop.MaybePruneRowsMapper;
 import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersKeyWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersMapper;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersReducer;
 import org.apache.mahout.cf.taste.hadoop.similarity.item.ToItemVectorsReducer;
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.math.VarIntWritable;
@@ -166,6 +161,7 @@ public final class RecommenderJob extend
       itemIDIndex.waitForCompletion(true);
     }
 
+    int numberOfUsers = 0;
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       Job toUserVector = prepareJob(
         inputPath, userVectorPath, TextInputFormat.class,
@@ -175,22 +171,8 @@ public final class RecommenderJob extend
       toUserVector.getConfiguration().setBoolean(BOOLEAN_DATA, booleanData);
       
toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER,
 minPrefsPerUser);
       toUserVector.waitForCompletion(true);
-    }
 
-    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
-      Job countUsers = prepareJob(userVectorPath,
-                                  countUsersPath,
-                                  SequenceFileInputFormat.class,
-                                  CountUsersMapper.class,
-                                  CountUsersKeyWritable.class,
-                                  VarLongWritable.class,
-                                  CountUsersReducer.class,
-                                  VarIntWritable.class,
-                                  NullWritable.class,
-                                  TextOutputFormat.class);
-      
countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
-      
countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
-      countUsers.waitForCompletion(true);
+      numberOfUsers = (int) 
toUserVector.getCounters().findCounter(ToUserVectorReducer.Counters.USERS).getValue();
     }
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -209,8 +191,6 @@ public final class RecommenderJob extend
       maybePruneAndTransponse.waitForCompletion(true);
     }
 
-    int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(), 
countUsersPath);
-
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       /* Once DistributedRowMatrix uses the hadoop 0.20 API, we should 
refactor this call to something like
        * new DistributedRowMatrix(...).rowSimilarity(...) */

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
 Sun Jun 19 21:54:08 2011
@@ -47,10 +47,13 @@ import org.apache.mahout.math.VectorWrit
 public final class ToUserVectorReducer extends
     Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable> {
 
-  public static final String MIN_PREFERENCES_PER_USER = 
ToUserVectorReducer.class.getName() + ".minPreferencesPerUser";
+  public static final String MIN_PREFERENCES_PER_USER = 
ToUserVectorReducer.class.getName() +
+      ".minPreferencesPerUser";
 
   private int minPreferences;
 
+  public enum Counters { USERS };
+
   @Override
   protected void setup(Context ctx) throws IOException, InterruptedException {
     super.setup(ctx);
@@ -71,6 +74,7 @@ public final class ToUserVectorReducer e
     if (userVector.getNumNondefaultElements() >= minPreferences) {
       VectorWritable vw = new VectorWritable(userVector);
       vw.setWritesLaxPrecision(true);
+      context.getCounter(Counters.USERS).increment(1);
       context.write(userID, vw);
     }
   }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
 Sun Jun 19 21:54:08 2011
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import com.google.common.collect.Iterables;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarLongWritable;
-
-import java.io.IOException;
-
-public class CountUsersCombiner
-    extends 
Reducer<CountUsersKeyWritable,VarLongWritable,CountUsersKeyWritable,VarLongWritable>
 {
-
-  @Override
-  protected void reduce(CountUsersKeyWritable key, Iterable<VarLongWritable> 
values, Context ctx)
-      throws IOException, InterruptedException {
-    /* we only need to see one tuple per user */
-    ctx.write(key, Iterables.get(values, 0));
-  }
-}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
 Sun Jun 19 21:54:08 2011
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import com.google.common.primitives.Longs;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Varint;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * a writable key that is used by {@link CountUsersMapper} and {@link 
CountUsersReducer} to
- * count unique users by sending all userIDs to the same reducer and have them 
sorted in
- * ascending order so that there's no buffering necessary when counting them
- */
-public class CountUsersKeyWritable implements 
WritableComparable<CountUsersKeyWritable> {
-
-  private long userID;
-
-  public CountUsersKeyWritable() {
-  }
-
-  public CountUsersKeyWritable(long userID) {
-    this.userID = userID;
-  }
-
-  public long getUserID() {
-    return userID;
-  }
-
-  @Override
-  public void readFields(DataInput in) throws IOException {
-    userID = Varint.readSignedVarLong(in);
-  }
-
-  @Override
-  public void write(DataOutput out) throws IOException {
-    Varint.writeSignedVarLong(userID, out);
-  }
-
-  @Override
-  public int compareTo(CountUsersKeyWritable other) {
-    return userID == other.userID ? 0 : userID < other.userID ? -1 : 1;
-  }
-
-  @Override
-  public boolean equals(Object other) {
-    return other instanceof CountUsersKeyWritable && userID == 
((CountUsersKeyWritable) other).userID;
-  }
-
-  @Override
-  public int hashCode() {
-    return Longs.hashCode(userID);
-  }
-
-  /**
-   * all userIDs go to the same partition
-   */
-  public static class CountUsersPartitioner extends 
Partitioner<CountUsersKeyWritable,VarLongWritable> {
-
-    @Override
-    public int getPartition(CountUsersKeyWritable key, VarLongWritable value, 
int numPartitions) {
-      return 0;
-    }
-
-  }
-
-  /**
-   * all userIDs go to the same reducer
-   */
-  public static class CountUsersGroupComparator extends WritableComparator 
implements Serializable {
-
-    public CountUsersGroupComparator() {
-      super(CountUsersKeyWritable.class, true);
-    }
-
-    @Override
-    public int compare(WritableComparable a, WritableComparable b) {
-      return 0;
-    }
-  }
-}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
 Sun Jun 19 21:54:08 2011
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.VectorWritable;
-
-/**
- * Maps out the userIDs in a way that we can use a secondary sort on them
- */
-public class CountUsersMapper extends
-    
Mapper<VarLongWritable,VectorWritable,CountUsersKeyWritable,VarLongWritable> {
-
-  @Override
-  protected void map(VarLongWritable key,
-                     VectorWritable value,
-                     Context context) throws IOException, InterruptedException 
{
-    long userID = key.get();
-    context.write(new CountUsersKeyWritable(userID), new 
VarLongWritable(userID));
-  }
-
-}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
 Sun Jun 19 21:54:08 2011
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-/**
- * counts all unique users, we ensure that we see userIDs sorted in ascending 
order via
- * secondary sort, so we don't have to buffer all of them
- */
-public class CountUsersReducer extends
-    Reducer<CountUsersKeyWritable,VarLongWritable, 
VarIntWritable,NullWritable> {
-
-  @Override
-  protected void reduce(CountUsersKeyWritable key,
-                        Iterable<VarLongWritable> userIDs,
-                        Context context) throws IOException, 
InterruptedException {
-
-    long lastSeenUserID = Long.MIN_VALUE;
-    int numberOfUsers = 0;
-
-    for (VarLongWritable writable : userIDs) {
-      long currentUserID = writable.get();
-      if (currentUserID > lastSeenUserID) {
-        lastSeenUserID = currentUserID;
-        numberOfUsers++;
-      }
-    }
-    context.write(new VarIntWritable(numberOfUsers), NullWritable.get());
-  }
-
-}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
 Sun Jun 19 21:54:08 2011
@@ -125,7 +125,6 @@ public final class ItemSimilarityJob ext
     Path outputPath = getOutputPath();
 
     Path itemIDIndexPath = getTempPath("itemIDIndex");
-    Path countUsersPath = getTempPath("countUsers");
     Path userVectorPath = getTempPath("userVectors");
     Path itemUserMatrixPath = getTempPath("itemUserMatrix");
     Path similarityMatrixPath = getTempPath("similarityMatrix");
@@ -142,6 +141,7 @@ public final class ItemSimilarityJob ext
       itemIDIndex.waitForCompletion(true);
     }
 
+    int numberOfUsers = 0;
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
       Job toUserVector = prepareJob(inputPath,
@@ -157,23 +157,8 @@ public final class ItemSimilarityJob ext
       toUserVector.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, 
booleanData);
       
toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER,
 minPrefsPerUser);
       toUserVector.waitForCompletion(true);
-    }
 
-    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
-      Job countUsers = prepareJob(userVectorPath,
-                                  countUsersPath,
-                                  SequenceFileInputFormat.class,
-                                  CountUsersMapper.class,
-                                  CountUsersKeyWritable.class,
-                                  VarLongWritable.class,
-                                  CountUsersReducer.class,
-                                  VarIntWritable.class,
-                                  NullWritable.class,
-                                  TextOutputFormat.class);
-      countUsers.setCombinerClass(CountUsersCombiner.class);
-      
countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
-      
countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
-      countUsers.waitForCompletion(true);
+      numberOfUsers = (int) 
toUserVector.getCounters().findCounter(ToUserVectorReducer.Counters.USERS).getValue();
     }
 
     if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -192,8 +177,6 @@ public final class ItemSimilarityJob ext
       maybePruneAndTransponse.waitForCompletion(true);
     }
 
-    int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(), 
countUsersPath);
-
     /* Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor 
this call to something like
      * new DistributedRowMatrix(...).rowSimilarity(...) */
     ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
 Sun Jun 19 21:54:08 2011
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
@@ -136,11 +137,14 @@ public class RecommenderJobTest extends 
   public void testToUserVectorReducer() throws Exception {
     
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context 
context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
 
+    
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
     context.write(EasyMock.eq(new VarLongWritable(12L)), 
MathHelper.vectorMatches(
         MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), 
MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 2.0)));
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, userCounters);
 
     Collection<VarLongWritable> varLongWritables = new 
LinkedList<VarLongWritable>();
     varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
@@ -148,7 +152,7 @@ public class RecommenderJobTest extends 
 
     new ToUserVectorReducer().reduce(new VarLongWritable(12L), 
varLongWritables, context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, userCounters);
   }
 
   /**
@@ -158,16 +162,19 @@ public class RecommenderJobTest extends 
   public void testToUserVectorReducerWithBooleanData() throws Exception {
     
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context 
context =
       EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
 
+    
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
     context.write(EasyMock.eq(new VarLongWritable(12L)), 
MathHelper.vectorMatches(
         MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0), 
MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 1.0)));
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, userCounters);
 
     new ToUserVectorReducer().reduce(new VarLongWritable(12L), 
Arrays.asList(new VarLongWritable(34L),
         new VarLongWritable(56L)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, userCounters);
   }
 
   /**

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
 Sun Jun 19 21:54:08 2011
@@ -17,6 +17,7 @@
 
 package org.apache.mahout.cf.taste.hadoop.item;
 
+import org.apache.hadoop.mapreduce.Counter;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.impl.TasteTestCase;
@@ -52,18 +53,21 @@ public class ToUserVectorReducerTest ext
   public void testToUsersReducerMinPreferencesUserPasses() throws Exception {
     
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context 
context =
         EasyMock.createMock(Reducer.Context.class);
+    Counter userCounters = EasyMock.createMock(Counter.class);
 
     ToUserVectorReducer reducer = new ToUserVectorReducer();
     setField(reducer, "minPreferences", 2);
 
+    
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+    userCounters.increment(1);
     context.write(EasyMock.eq(new VarLongWritable(123)), 
MathHelper.vectorMatches(
         MathHelper.elem(TasteHadoopUtils.idToIndex(456L), 1.0), 
MathHelper.elem(TasteHadoopUtils.idToIndex(789L), 1.0)));
 
-    EasyMock.replay(context);
+    EasyMock.replay(context, userCounters);
 
     reducer.reduce(new VarLongWritable(123), Arrays.asList(new 
VarLongWritable(456), new VarLongWritable(789)), context);
 
-    EasyMock.verify(context);
+    EasyMock.verify(context, userCounters);
   }
 
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
 Sun Jun 19 21:54:08 2011
@@ -26,14 +26,12 @@ import java.util.List;
 import com.google.common.base.Charsets;
 import com.google.common.io.Files;
 import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.NullWritable;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
 import org.apache.mahout.cf.taste.impl.TasteTestCase;
 import org.apache.mahout.math.RandomAccessSparseVector;
 import org.apache.mahout.math.VarIntWritable;
@@ -54,63 +52,6 @@ import org.junit.Test;
 public final class ItemSimilarityJobTest extends TasteTestCase {
 
   /**
-   * Tests {@link CountUsersMapper}
-   */
-  @Test
-  public void testCountUsersMapper() throws Exception {
-    
Mapper<VarLongWritable,VectorWritable,CountUsersKeyWritable,VarLongWritable>.Context
 context =
-        EasyMock.createMock(Mapper.Context.class);
-    context.write(keyForUserID(12L), EasyMock.eq(new VarLongWritable(12L)));
-    context.write(keyForUserID(35L), EasyMock.eq(new VarLongWritable(35L)));
-    EasyMock.replay(context);
-
-    CountUsersMapper mapper = new CountUsersMapper();
-    mapper.map(new VarLongWritable(12), new VectorWritable(), context);
-    mapper.map(new VarLongWritable(35), new VectorWritable(), context);
-
-    EasyMock.verify(context);
-  }
-
-  /**
-   * Applies an {@link IArgumentMatcher} to a {@link CountUsersKeyWritable} 
checking whether it matches the userID
-   */
-  static CountUsersKeyWritable keyForUserID(final long userID) {
-    EasyMock.reportMatcher(new IArgumentMatcher() {
-      @Override
-      public boolean matches(Object argument) {
-        if (argument instanceof CountUsersKeyWritable) {
-          CountUsersKeyWritable key = (CountUsersKeyWritable) argument;
-          return userID == key.getUserID();
-        }
-        return false;
-      }
-
-      @Override
-      public void appendTo(StringBuffer buffer) {}
-    });
-
-    return null;
-  }
-
-  /**
-   * Tests {@link CountUsersReducer}
-   */
-  @Test
-  public void testCountUsersReducer() throws Exception {
-    
Reducer<CountUsersKeyWritable,VarLongWritable,VarIntWritable,NullWritable>.Context
 context =
-        EasyMock.createMock(Reducer.Context.class);
-    context.write(new VarIntWritable(3), NullWritable.get());
-    EasyMock.replay(context);
-
-    List<VarLongWritable> userIDs = Arrays.asList(new VarLongWritable(1L), new 
VarLongWritable(1L),
-                                                  new VarLongWritable(3L), new 
VarLongWritable(5L),
-                                                  new VarLongWritable(5L), new 
VarLongWritable(5L));
-
-    new CountUsersReducer().reduce(null, userIDs, context);
-    EasyMock.verify(context);
-  }
-
-  /**
    * Tests {@link MostSimilarItemPairsMapper}
    */
   @Test
@@ -199,12 +140,6 @@ public final class ItemSimilarityJobTest
     similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), 
"--similarityClassname",
        DistributedUncenteredZeroAssumingCosineVectorSimilarity.class.getName() 
});
 
-    File countUsersPart = new File(tmpDir, "countUsers");
-    int numberOfUsers = TasteHadoopUtils.readIntFromFile(new Configuration(),
-        new Path(countUsersPart.getAbsolutePath()));
-
-    assertEquals(3, numberOfUsers);
-
     File outPart = outputDir.listFiles(new FilenameFilter() {
       @Override
       public boolean accept(File dir, String name) {


Reply via email to