Author: ssc
Date: Sun Jun 19 21:54:08 2011
New Revision: 1137456
URL: http://svn.apache.org/viewvc?rev=1137456&view=rev
Log:
MAHOUT-736 Save one pass through the data in ItemSimilarityJob and
RecommenderJob by intelligently using counters
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/TasteHadoopUtils.java
Sun Jun 19 21:54:08 2011
@@ -82,21 +82,4 @@ public final class TasteHadoopUtils {
return indexItemIDMap;
}
- /**
- * Reads a text-based outputfile that only contains an int
- */
- public static int readIntFromFile(Configuration conf, Path outputDir) throws
IOException {
- FileSystem fs = outputDir.getFileSystem(conf);
- Path outputFile = fs.listStatus(outputDir,
PathFilters.partFilter())[0].getPath();
- InputStream in = null;
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- try {
- in = fs.open(outputFile);
- IOUtils.copyBytes(in, out, conf);
- return Integer.parseInt(new String(out.toByteArray(),
Charsets.UTF_8).trim());
- } finally {
- Closeables.closeQuietly(in);
- Closeables.closeQuietly(out);
- }
- }
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Sun Jun 19 21:54:08 2011
@@ -20,7 +20,6 @@ package org.apache.mahout.cf.taste.hadoo
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
@@ -33,11 +32,7 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
import org.apache.mahout.cf.taste.hadoop.MaybePruneRowsMapper;
import org.apache.mahout.cf.taste.hadoop.RecommendedItemsWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.cf.taste.hadoop.ToItemPrefsMapper;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersKeyWritable;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersMapper;
-import org.apache.mahout.cf.taste.hadoop.similarity.item.CountUsersReducer;
import org.apache.mahout.cf.taste.hadoop.similarity.item.ToItemVectorsReducer;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.math.VarIntWritable;
@@ -166,6 +161,7 @@ public final class RecommenderJob extend
itemIDIndex.waitForCompletion(true);
}
+ int numberOfUsers = 0;
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
Job toUserVector = prepareJob(
inputPath, userVectorPath, TextInputFormat.class,
@@ -175,22 +171,8 @@ public final class RecommenderJob extend
toUserVector.getConfiguration().setBoolean(BOOLEAN_DATA, booleanData);
toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER,
minPrefsPerUser);
toUserVector.waitForCompletion(true);
- }
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job countUsers = prepareJob(userVectorPath,
- countUsersPath,
- SequenceFileInputFormat.class,
- CountUsersMapper.class,
- CountUsersKeyWritable.class,
- VarLongWritable.class,
- CountUsersReducer.class,
- VarIntWritable.class,
- NullWritable.class,
- TextOutputFormat.class);
-
countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
-
countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
- countUsers.waitForCompletion(true);
+ numberOfUsers = (int)
toUserVector.getCounters().findCounter(ToUserVectorReducer.Counters.USERS).getValue();
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -209,8 +191,6 @@ public final class RecommenderJob extend
maybePruneAndTransponse.waitForCompletion(true);
}
- int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(),
countUsersPath);
-
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
/* Once DistributedRowMatrix uses the hadoop 0.20 API, we should
refactor this call to something like
* new DistributedRowMatrix(...).rowSimilarity(...) */
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducer.java
Sun Jun 19 21:54:08 2011
@@ -47,10 +47,13 @@ import org.apache.mahout.math.VectorWrit
public final class ToUserVectorReducer extends
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable> {
- public static final String MIN_PREFERENCES_PER_USER =
ToUserVectorReducer.class.getName() + ".minPreferencesPerUser";
+ public static final String MIN_PREFERENCES_PER_USER =
ToUserVectorReducer.class.getName() +
+ ".minPreferencesPerUser";
private int minPreferences;
+ public enum Counters { USERS };
+
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
super.setup(ctx);
@@ -71,6 +74,7 @@ public final class ToUserVectorReducer e
if (userVector.getNumNondefaultElements() >= minPreferences) {
VectorWritable vw = new VectorWritable(userVector);
vw.setWritesLaxPrecision(true);
+ context.getCounter(Counters.USERS).increment(1);
context.write(userID, vw);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersCombiner.java
Sun Jun 19 21:54:08 2011
@@ -1,35 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import com.google.common.collect.Iterables;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarLongWritable;
-
-import java.io.IOException;
-
-public class CountUsersCombiner
- extends
Reducer<CountUsersKeyWritable,VarLongWritable,CountUsersKeyWritable,VarLongWritable>
{
-
- @Override
- protected void reduce(CountUsersKeyWritable key, Iterable<VarLongWritable>
values, Context ctx)
- throws IOException, InterruptedException {
- /* we only need to see one tuple per user */
- ctx.write(key, Iterables.get(values, 0));
- }
-}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersKeyWritable.java
Sun Jun 19 21:54:08 2011
@@ -1,103 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import com.google.common.primitives.Longs;
-import org.apache.hadoop.io.WritableComparable;
-import org.apache.hadoop.io.WritableComparator;
-import org.apache.hadoop.mapreduce.Partitioner;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.Varint;
-
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.Serializable;
-
-/**
- * a writable key that is used by {@link CountUsersMapper} and {@link
CountUsersReducer} to
- * count unique users by sending all userIDs to the same reducer and have them
sorted in
- * ascending order so that there's no buffering necessary when counting them
- */
-public class CountUsersKeyWritable implements
WritableComparable<CountUsersKeyWritable> {
-
- private long userID;
-
- public CountUsersKeyWritable() {
- }
-
- public CountUsersKeyWritable(long userID) {
- this.userID = userID;
- }
-
- public long getUserID() {
- return userID;
- }
-
- @Override
- public void readFields(DataInput in) throws IOException {
- userID = Varint.readSignedVarLong(in);
- }
-
- @Override
- public void write(DataOutput out) throws IOException {
- Varint.writeSignedVarLong(userID, out);
- }
-
- @Override
- public int compareTo(CountUsersKeyWritable other) {
- return userID == other.userID ? 0 : userID < other.userID ? -1 : 1;
- }
-
- @Override
- public boolean equals(Object other) {
- return other instanceof CountUsersKeyWritable && userID ==
((CountUsersKeyWritable) other).userID;
- }
-
- @Override
- public int hashCode() {
- return Longs.hashCode(userID);
- }
-
- /**
- * all userIDs go to the same partition
- */
- public static class CountUsersPartitioner extends
Partitioner<CountUsersKeyWritable,VarLongWritable> {
-
- @Override
- public int getPartition(CountUsersKeyWritable key, VarLongWritable value,
int numPartitions) {
- return 0;
- }
-
- }
-
- /**
- * all userIDs go to the same reducer
- */
- public static class CountUsersGroupComparator extends WritableComparator
implements Serializable {
-
- public CountUsersGroupComparator() {
- super(CountUsersKeyWritable.class, true);
- }
-
- @Override
- public int compare(WritableComparable a, WritableComparable b) {
- return 0;
- }
- }
-}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersMapper.java
Sun Jun 19 21:54:08 2011
@@ -1,40 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.mapreduce.Mapper;
-import org.apache.mahout.math.VarLongWritable;
-import org.apache.mahout.math.VectorWritable;
-
-/**
- * Maps out the userIDs in a way that we can use a secondary sort on them
- */
-public class CountUsersMapper extends
-
Mapper<VarLongWritable,VectorWritable,CountUsersKeyWritable,VarLongWritable> {
-
- @Override
- protected void map(VarLongWritable key,
- VectorWritable value,
- Context context) throws IOException, InterruptedException
{
- long userID = key.get();
- context.write(new CountUsersKeyWritable(userID), new
VarLongWritable(userID));
- }
-
-}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/CountUsersReducer.java
Sun Jun 19 21:54:08 2011
@@ -1,52 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.cf.taste.hadoop.similarity.item;
-
-import java.io.IOException;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.mahout.math.VarIntWritable;
-import org.apache.mahout.math.VarLongWritable;
-
-/**
- * counts all unique users, we ensure that we see userIDs sorted in ascending
order via
- * secondary sort, so we don't have to buffer all of them
- */
-public class CountUsersReducer extends
- Reducer<CountUsersKeyWritable,VarLongWritable,
VarIntWritable,NullWritable> {
-
- @Override
- protected void reduce(CountUsersKeyWritable key,
- Iterable<VarLongWritable> userIDs,
- Context context) throws IOException,
InterruptedException {
-
- long lastSeenUserID = Long.MIN_VALUE;
- int numberOfUsers = 0;
-
- for (VarLongWritable writable : userIDs) {
- long currentUserID = writable.get();
- if (currentUserID > lastSeenUserID) {
- lastSeenUserID = currentUserID;
- numberOfUsers++;
- }
- }
- context.write(new VarIntWritable(numberOfUsers), NullWritable.get());
- }
-
-}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
Sun Jun 19 21:54:08 2011
@@ -125,7 +125,6 @@ public final class ItemSimilarityJob ext
Path outputPath = getOutputPath();
Path itemIDIndexPath = getTempPath("itemIDIndex");
- Path countUsersPath = getTempPath("countUsers");
Path userVectorPath = getTempPath("userVectors");
Path itemUserMatrixPath = getTempPath("itemUserMatrix");
Path similarityMatrixPath = getTempPath("similarityMatrix");
@@ -142,6 +141,7 @@ public final class ItemSimilarityJob ext
itemIDIndex.waitForCompletion(true);
}
+ int numberOfUsers = 0;
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
Job toUserVector = prepareJob(inputPath,
@@ -157,23 +157,8 @@ public final class ItemSimilarityJob ext
toUserVector.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA,
booleanData);
toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER,
minPrefsPerUser);
toUserVector.waitForCompletion(true);
- }
- if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job countUsers = prepareJob(userVectorPath,
- countUsersPath,
- SequenceFileInputFormat.class,
- CountUsersMapper.class,
- CountUsersKeyWritable.class,
- VarLongWritable.class,
- CountUsersReducer.class,
- VarIntWritable.class,
- NullWritable.class,
- TextOutputFormat.class);
- countUsers.setCombinerClass(CountUsersCombiner.class);
-
countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
-
countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
- countUsers.waitForCompletion(true);
+ numberOfUsers = (int)
toUserVector.getCounters().findCounter(ToUserVectorReducer.Counters.USERS).getValue();
}
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
@@ -192,8 +177,6 @@ public final class ItemSimilarityJob ext
maybePruneAndTransponse.waitForCompletion(true);
}
- int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(),
countUsersPath);
-
/* Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor
this call to something like
* new DistributedRowMatrix(...).rowSimilarity(...) */
ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] {
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
Sun Jun 19 21:54:08 2011
@@ -31,6 +31,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.cf.taste.hadoop.EntityPrefWritable;
@@ -136,11 +137,14 @@ public class RecommenderJobTest extends
public void testToUserVectorReducer() throws Exception {
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context
context =
EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
+
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
context.write(EasyMock.eq(new VarLongWritable(12L)),
MathHelper.vectorMatches(
MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0),
MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 2.0)));
- EasyMock.replay(context);
+ EasyMock.replay(context, userCounters);
Collection<VarLongWritable> varLongWritables = new
LinkedList<VarLongWritable>();
varLongWritables.add(new EntityPrefWritable(34L, 1.0f));
@@ -148,7 +152,7 @@ public class RecommenderJobTest extends
new ToUserVectorReducer().reduce(new VarLongWritable(12L),
varLongWritables, context);
- EasyMock.verify(context);
+ EasyMock.verify(context, userCounters);
}
/**
@@ -158,16 +162,19 @@ public class RecommenderJobTest extends
public void testToUserVectorReducerWithBooleanData() throws Exception {
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context
context =
EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
+
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
context.write(EasyMock.eq(new VarLongWritable(12L)),
MathHelper.vectorMatches(
MathHelper.elem(TasteHadoopUtils.idToIndex(34L), 1.0),
MathHelper.elem(TasteHadoopUtils.idToIndex(56L), 1.0)));
- EasyMock.replay(context);
+ EasyMock.replay(context, userCounters);
new ToUserVectorReducer().reduce(new VarLongWritable(12L),
Arrays.asList(new VarLongWritable(34L),
new VarLongWritable(56L)), context);
- EasyMock.verify(context);
+ EasyMock.verify(context, userCounters);
}
/**
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorReducerTest.java
Sun Jun 19 21:54:08 2011
@@ -17,6 +17,7 @@
package org.apache.mahout.cf.taste.hadoop.item;
+import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.cf.taste.impl.TasteTestCase;
@@ -52,18 +53,21 @@ public class ToUserVectorReducerTest ext
public void testToUsersReducerMinPreferencesUserPasses() throws Exception {
Reducer<VarLongWritable,VarLongWritable,VarLongWritable,VectorWritable>.Context
context =
EasyMock.createMock(Reducer.Context.class);
+ Counter userCounters = EasyMock.createMock(Counter.class);
ToUserVectorReducer reducer = new ToUserVectorReducer();
setField(reducer, "minPreferences", 2);
+
EasyMock.expect(context.getCounter(ToUserVectorReducer.Counters.USERS)).andReturn(userCounters);
+ userCounters.increment(1);
context.write(EasyMock.eq(new VarLongWritable(123)),
MathHelper.vectorMatches(
MathHelper.elem(TasteHadoopUtils.idToIndex(456L), 1.0),
MathHelper.elem(TasteHadoopUtils.idToIndex(789L), 1.0)));
- EasyMock.replay(context);
+ EasyMock.replay(context, userCounters);
reducer.reduce(new VarLongWritable(123), Arrays.asList(new
VarLongWritable(456), new VarLongWritable(789)), context);
- EasyMock.verify(context);
+ EasyMock.verify(context, userCounters);
}
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java?rev=1137456&r1=1137455&r2=1137456&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJobTest.java
Sun Jun 19 21:54:08 2011
@@ -26,14 +26,12 @@ import java.util.List;
import com.google.common.base.Charsets;
import com.google.common.io.Files;
import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
-import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.cf.taste.impl.TasteTestCase;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.VarIntWritable;
@@ -54,63 +52,6 @@ import org.junit.Test;
public final class ItemSimilarityJobTest extends TasteTestCase {
/**
- * Tests {@link CountUsersMapper}
- */
- @Test
- public void testCountUsersMapper() throws Exception {
-
Mapper<VarLongWritable,VectorWritable,CountUsersKeyWritable,VarLongWritable>.Context
context =
- EasyMock.createMock(Mapper.Context.class);
- context.write(keyForUserID(12L), EasyMock.eq(new VarLongWritable(12L)));
- context.write(keyForUserID(35L), EasyMock.eq(new VarLongWritable(35L)));
- EasyMock.replay(context);
-
- CountUsersMapper mapper = new CountUsersMapper();
- mapper.map(new VarLongWritable(12), new VectorWritable(), context);
- mapper.map(new VarLongWritable(35), new VectorWritable(), context);
-
- EasyMock.verify(context);
- }
-
- /**
- * Applies an {@link IArgumentMatcher} to a {@link CountUsersKeyWritable}
checking whether it matches the userID
- */
- static CountUsersKeyWritable keyForUserID(final long userID) {
- EasyMock.reportMatcher(new IArgumentMatcher() {
- @Override
- public boolean matches(Object argument) {
- if (argument instanceof CountUsersKeyWritable) {
- CountUsersKeyWritable key = (CountUsersKeyWritable) argument;
- return userID == key.getUserID();
- }
- return false;
- }
-
- @Override
- public void appendTo(StringBuffer buffer) {}
- });
-
- return null;
- }
-
- /**
- * Tests {@link CountUsersReducer}
- */
- @Test
- public void testCountUsersReducer() throws Exception {
-
Reducer<CountUsersKeyWritable,VarLongWritable,VarIntWritable,NullWritable>.Context
context =
- EasyMock.createMock(Reducer.Context.class);
- context.write(new VarIntWritable(3), NullWritable.get());
- EasyMock.replay(context);
-
- List<VarLongWritable> userIDs = Arrays.asList(new VarLongWritable(1L), new
VarLongWritable(1L),
- new VarLongWritable(3L), new
VarLongWritable(5L),
- new VarLongWritable(5L), new
VarLongWritable(5L));
-
- new CountUsersReducer().reduce(null, userIDs, context);
- EasyMock.verify(context);
- }
-
- /**
* Tests {@link MostSimilarItemPairsMapper}
*/
@Test
@@ -199,12 +140,6 @@ public final class ItemSimilarityJobTest
similarityJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(),
"--similarityClassname",
DistributedUncenteredZeroAssumingCosineVectorSimilarity.class.getName()
});
- File countUsersPart = new File(tmpDir, "countUsers");
- int numberOfUsers = TasteHadoopUtils.readIntFromFile(new Configuration(),
- new Path(countUsersPart.getAbsolutePath()));
-
- assertEquals(3, numberOfUsers);
-
File outPart = outputDir.listFiles(new FilenameFilter() {
@Override
public boolean accept(File dir, String name) {