Author: srowen
Date: Fri Aug 13 18:20:53 2010
New Revision: 985314
URL: http://svn.apache.org/viewvc?rev=985314&view=rev
Log:
MAHOUT-463
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
Fri Aug 13 18:20:53 2010
@@ -65,6 +65,8 @@ public final class AggregateAndRecommend
private FastIDSet itemsToRecommendFor;
private OpenIntLongHashMap indexItemIDMap;
+ private static final float BOOLEAN_PREF_VALUE = 1.0f;
+
@Override
protected void setup(Context context) {
Configuration jobConf = context.getConfiguration();
@@ -128,10 +130,13 @@ public final class AggregateAndRecommend
Iterator<Element> predictions = predictionVector.iterateNonZero();
List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>();
while (predictions.hasNext() && recommendations.size() <
recommendationsPerUser) {
- int itemIDIndex = predictions.next().index();
- long itemID = indexItemIDMap.get(itemIDIndex);
- if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID))
{
- recommendations.add(new GenericRecommendedItem(itemID, 1.0f));
+ Vector.Element prediction = predictions.next();
+ /* NaN means the user already knows this item */
+ if (!Double.isNaN(prediction.get())) {
+ long itemID = indexItemIDMap.get(prediction.index());
+ if (itemsToRecommendFor == null ||
itemsToRecommendFor.contains(itemID)) {
+ recommendations.add(new GenericRecommendedItem(itemID,
BOOLEAN_PREF_VALUE));
+ }
}
}
@@ -161,8 +166,8 @@ public final class AggregateAndRecommend
}
numerators = numerators == null
- ? prefValue == 1.0f ? simColumn.clone() : simColumn.times(prefValue)
- : numerators.plus(prefValue == 1.0f ? simColumn :
simColumn.times(prefValue));
+ ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() :
simColumn.times(prefValue)
+ : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn :
simColumn.times(prefValue));
simColumn.assign(ABSOLUTE_VALUES);
denominators = denominators == null ? simColumn :
denominators.plus(simColumn);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Fri Aug 13 18:20:53 2010
@@ -181,6 +181,7 @@ public final class RecommenderJob extend
IntWritable.class,
VectorWritable.class,
SequenceFileOutputFormat.class);
+
itemUserMatrix.getConfiguration().setBoolean(PrefsToItemUserMatrixMapper.BOOLEAN_DATA,
booleanData);
itemUserMatrix.waitForCompletion(true);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
Fri Aug 13 18:20:53 2010
@@ -63,6 +63,7 @@ public final class ItemSimilarityJob ext
"one of the predefined similarities (" +
SimilarityType.listEnumNames() + ')');
addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar
items per item to this number " +
"(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')',
String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
+ addOption("booleanData", "b", "Treat input as without pref values",
Boolean.FALSE.toString());
Map<String,String> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
@@ -71,6 +72,7 @@ public final class ItemSimilarityJob ext
String similarityClassName = parsedArgs.get("--similarityClassname");
int maxSimilarItemsPerItem =
Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
+ boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
Path inputPath = getInputPath();
Path outputPath = getOutputPath();
@@ -120,6 +122,7 @@ public final class ItemSimilarityJob ext
IntWritable.class,
VectorWritable.class,
SequenceFileOutputFormat.class);
+
itemUserMatrix.getConfiguration().setBoolean(PrefsToItemUserMatrixMapper.BOOLEAN_DATA,
booleanData);
itemUserMatrix.waitForCompletion(true);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
Fri Aug 13 18:20:53 2010
@@ -32,6 +32,15 @@ import org.apache.mahout.math.hadoop.Dis
public class PrefsToItemUserMatrixMapper
extends
Mapper<LongWritable,Text,VarIntWritable,DistributedRowMatrix.MatrixEntryWritable>
{
+ public static final String BOOLEAN_DATA =
PrefsToItemUserMatrixMapper.class.getName() + ".booleanData";
+
+ private boolean booleanData;
+
+ @Override
+ protected void setup(Context ctx) throws IOException, InterruptedException {
+ booleanData = ctx.getConfiguration().getBoolean(BOOLEAN_DATA, false);
+ }
+
@Override
protected void map(LongWritable key, Text value, Context ctx)
throws IOException, InterruptedException {
@@ -39,7 +48,9 @@ public class PrefsToItemUserMatrixMapper
String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
long userID = Long.parseLong(tokens[0]);
long itemID = Long.parseLong(tokens[1]);
- float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
+
+ boolean treatAsBoolean = booleanData || tokens.length < 3;
+ float prefValue = treatAsBoolean ? 1.0f : Float.parseFloat(tokens[2]);
int row = TasteHadoopUtils.idToIndex(itemID);
int column = TasteHadoopUtils.idToIndex(userID);
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
Fri Aug 13 18:20:53 2010
@@ -47,6 +47,7 @@ import org.apache.mahout.math.VarLongWri
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.MathHelper;
+import
org.apache.mahout.math.hadoop.similarity.vector.DistributedCooccurrenceVectorSimilarity;
import
org.apache.mahout.math.hadoop.similarity.vector.DistributedTanimotoCoefficientVectorSimilarity;
import org.apache.mahout.math.map.OpenIntLongHashMap;
import org.easymock.IArgumentMatcher;
@@ -715,6 +716,54 @@ public class RecommenderJobTest extends
}
}
+ /**
+ * small integration test for boolean data
+ */
+ public void testCompleteJobBoolean() throws Exception {
+
+ File inputFile = getTestTempFile("prefs.txt");
+ File outputDir = getTestTempDir("output");
+ outputDir.delete();
+ File tmpDir = getTestTempDir("tmp");
+ File usersFile = getTestTempFile("users.txt");
+ writeLines(usersFile, "3");
+
+ writeLines(inputFile,
+ "1,1",
+ "1,2",
+ "1,3",
+ "2,1",
+ "2,3",
+ "2,4",
+ "3,2",
+ "3,4",
+ "4,1",
+ "4,4");
+
+ RecommenderJob recommenderJob = new RecommenderJob();
+
+ Configuration conf = new Configuration();
+ conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+ conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+ conf.setBoolean("mapred.output.compress", false);
+
+ recommenderJob.setConf(conf);
+
+ recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(),
"--similarityClassname",
+ DistributedCooccurrenceVectorSimilarity.class.getName(),
"--booleanData", "true",
+ "--usersFile", usersFile.getAbsolutePath() });
+
+ Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new
File(outputDir, "part-r-00000"));
+
+ List<RecommendedItem> recommendedToCow = recommendations.get(3L);
+ assertEquals(2, recommendedToCow.size());
+
+ long itemID1 = recommendedToCow.get(0).getItemID();
+ long itemID2 = recommendedToCow.get(1).getItemID();
+
+ assertTrue((itemID1 == 1L && itemID2 == 3L) || (itemID1 == 3L && itemID2
== 1L));
+ }
+
static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws
IOException {
Map<Long,List<RecommendedItem>> recommendations = new
HashMap<Long,List<RecommendedItem>>();
FileLineIterable lineIterable = new FileLineIterable(file);
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
Fri Aug 13 18:20:53 2010
@@ -138,6 +138,24 @@ public final class ItemSimilarityTest ex
EasyMock.verify(context);
}
+ public void testPrefsToItemUserMatrixMapperBoolean() throws Exception {
+ Mapper<LongWritable,Text,VarIntWritable, MatrixEntryWritable>.Context
context =
+ EasyMock.createMock(Mapper.Context.class);
+ context.write(EasyMock.eq(new
VarIntWritable(TasteHadoopUtils.idToIndex(100L))),
+ MathHelper.matrixEntryMatches(TasteHadoopUtils.idToIndex(100L),
+ TasteHadoopUtils.idToIndex(12L), 1d));
+ context.write(EasyMock.eq(new
VarIntWritable(TasteHadoopUtils.idToIndex(20L))),
+ MathHelper.matrixEntryMatches(TasteHadoopUtils.idToIndex(20L),
TasteHadoopUtils.idToIndex(35L), 1d));
+ EasyMock.replay(context);
+
+ PrefsToItemUserMatrixMapper mapper = new PrefsToItemUserMatrixMapper();
+ setField(mapper, "booleanData", Boolean.TRUE);
+ mapper.map(null, new Text("12,100"), context);
+ mapper.map(null, new Text("35,20,3.0"), context);
+
+ EasyMock.verify(context);
+ }
+
/**
* tests {...@link PrefsToItemUserMatrixReducer}
*