Author: srowen
Date: Fri Aug 13 18:20:53 2010
New Revision: 985314

URL: http://svn.apache.org/viewvc?rev=985314&view=rev
Log:
MAHOUT-463

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
 Fri Aug 13 18:20:53 2010
@@ -65,6 +65,8 @@ public final class AggregateAndRecommend
   private FastIDSet itemsToRecommendFor;
   private OpenIntLongHashMap indexItemIDMap;
 
+  private static final float BOOLEAN_PREF_VALUE = 1.0f;
+
   @Override
   protected void setup(Context context) {
     Configuration jobConf = context.getConfiguration();
@@ -128,10 +130,13 @@ public final class AggregateAndRecommend
     Iterator<Element> predictions = predictionVector.iterateNonZero();
     List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>();
     while (predictions.hasNext() && recommendations.size() < 
recommendationsPerUser) {
-      int itemIDIndex = predictions.next().index();
-      long itemID = indexItemIDMap.get(itemIDIndex);
-      if (itemsToRecommendFor == null || itemsToRecommendFor.contains(itemID)) 
{
-        recommendations.add(new GenericRecommendedItem(itemID, 1.0f));
+      Vector.Element prediction = predictions.next();
+      /* NaN means the user already knows this item */
+      if (!Double.isNaN(prediction.get())) {
+        long itemID = indexItemIDMap.get(prediction.index());
+        if (itemsToRecommendFor == null || 
itemsToRecommendFor.contains(itemID)) {
+          recommendations.add(new GenericRecommendedItem(itemID, 
BOOLEAN_PREF_VALUE));
+        }
       }
     }
 
@@ -161,8 +166,8 @@ public final class AggregateAndRecommend
       }
 
       numerators = numerators == null
-          ? prefValue == 1.0f ? simColumn.clone() : simColumn.times(prefValue)
-          : numerators.plus(prefValue == 1.0f ? simColumn : 
simColumn.times(prefValue));
+          ? prefValue == BOOLEAN_PREF_VALUE ? simColumn.clone() : 
simColumn.times(prefValue)
+          : numerators.plus(prefValue == BOOLEAN_PREF_VALUE ? simColumn : 
simColumn.times(prefValue));
 
       simColumn.assign(ABSOLUTE_VALUES);
       denominators = denominators == null ? simColumn : 
denominators.plus(simColumn);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
 Fri Aug 13 18:20:53 2010
@@ -181,6 +181,7 @@ public final class RecommenderJob extend
                                   IntWritable.class,
                                   VectorWritable.class,
                                   SequenceFileOutputFormat.class);
+      
itemUserMatrix.getConfiguration().setBoolean(PrefsToItemUserMatrixMapper.BOOLEAN_DATA,
 booleanData);
       itemUserMatrix.waitForCompletion(true);
     }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityJob.java
 Fri Aug 13 18:20:53 2010
@@ -63,6 +63,7 @@ public final class ItemSimilarityJob ext
         "one of the predefined similarities (" + 
SimilarityType.listEnumNames() + ')');
     addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar 
items per item to this number " +
         "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', 
String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
+    addOption("booleanData", "b", "Treat input as without pref values", 
Boolean.FALSE.toString());
 
     Map<String,String> parsedArgs = parseArguments(args);
     if (parsedArgs == null) {
@@ -71,6 +72,7 @@ public final class ItemSimilarityJob ext
 
     String similarityClassName = parsedArgs.get("--similarityClassname");
     int maxSimilarItemsPerItem = 
Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
+    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
 
     Path inputPath = getInputPath();
     Path outputPath = getOutputPath();
@@ -120,6 +122,7 @@ public final class ItemSimilarityJob ext
                                   IntWritable.class,
                                   VectorWritable.class,
                                   SequenceFileOutputFormat.class);
+      
itemUserMatrix.getConfiguration().setBoolean(PrefsToItemUserMatrixMapper.BOOLEAN_DATA,
 booleanData);
       itemUserMatrix.waitForCompletion(true);
     }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/similarity/item/PrefsToItemUserMatrixMapper.java
 Fri Aug 13 18:20:53 2010
@@ -32,6 +32,15 @@ import org.apache.mahout.math.hadoop.Dis
 public class PrefsToItemUserMatrixMapper
     extends 
Mapper<LongWritable,Text,VarIntWritable,DistributedRowMatrix.MatrixEntryWritable>
 {
 
+  public static final String BOOLEAN_DATA = 
PrefsToItemUserMatrixMapper.class.getName() + ".booleanData";
+
+  private boolean booleanData;
+  
+  @Override
+  protected void setup(Context ctx) throws IOException, InterruptedException {
+    booleanData = ctx.getConfiguration().getBoolean(BOOLEAN_DATA, false);
+  }
+
   @Override
   protected void map(LongWritable key, Text value, Context ctx)
       throws IOException, InterruptedException {
@@ -39,7 +48,9 @@ public class PrefsToItemUserMatrixMapper
     String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
     long userID = Long.parseLong(tokens[0]);
     long itemID = Long.parseLong(tokens[1]);
-    float prefValue = tokens.length > 2 ? Float.parseFloat(tokens[2]) : 1.0f;
+
+    boolean treatAsBoolean = booleanData || tokens.length < 3;
+    float prefValue = treatAsBoolean ? 1.0f : Float.parseFloat(tokens[2]);
 
     int row = TasteHadoopUtils.idToIndex(itemID);
     int column = TasteHadoopUtils.idToIndex(userID);

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJobTest.java
 Fri Aug 13 18:20:53 2010
@@ -47,6 +47,7 @@ import org.apache.mahout.math.VarLongWri
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.hadoop.MathHelper;
+import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedCooccurrenceVectorSimilarity;
 import 
org.apache.mahout.math.hadoop.similarity.vector.DistributedTanimotoCoefficientVectorSimilarity;
 import org.apache.mahout.math.map.OpenIntLongHashMap;
 import org.easymock.IArgumentMatcher;
@@ -715,6 +716,54 @@ public class RecommenderJobTest extends 
     }
   }
 
+  /**
+   * small integration test for boolean data
+   */
+  public void testCompleteJobBoolean() throws Exception {
+
+    File inputFile = getTestTempFile("prefs.txt");
+    File outputDir = getTestTempDir("output");
+    outputDir.delete();
+    File tmpDir = getTestTempDir("tmp");
+    File usersFile = getTestTempFile("users.txt");
+    writeLines(usersFile, "3");
+
+    writeLines(inputFile,
+        "1,1",
+        "1,2",
+        "1,3",
+        "2,1",
+        "2,3",
+        "2,4",
+        "3,2",
+        "3,4",
+        "4,1",
+        "4,4");
+
+    RecommenderJob recommenderJob = new RecommenderJob();
+
+    Configuration conf = new Configuration();
+    conf.set("mapred.input.dir", inputFile.getAbsolutePath());
+    conf.set("mapred.output.dir", outputDir.getAbsolutePath());
+    conf.setBoolean("mapred.output.compress", false);
+
+    recommenderJob.setConf(conf);
+
+    recommenderJob.run(new String[] { "--tempDir", tmpDir.getAbsolutePath(), 
"--similarityClassname",
+        DistributedCooccurrenceVectorSimilarity.class.getName(), 
"--booleanData", "true",
+        "--usersFile", usersFile.getAbsolutePath() });
+
+    Map<Long,List<RecommendedItem>> recommendations = readRecommendations(new 
File(outputDir, "part-r-00000"));
+
+    List<RecommendedItem> recommendedToCow = recommendations.get(3L);
+    assertEquals(2, recommendedToCow.size());
+
+    long itemID1 = recommendedToCow.get(0).getItemID();
+    long itemID2 = recommendedToCow.get(1).getItemID();
+
+    assertTrue((itemID1 == 1L && itemID2 == 3L) || (itemID1 == 3L && itemID2 
== 1L));
+  }
+
   static Map<Long,List<RecommendedItem>> readRecommendations(File file) throws 
IOException {
     Map<Long,List<RecommendedItem>> recommendations = new 
HashMap<Long,List<RecommendedItem>>();
     FileLineIterable lineIterable = new FileLineIterable(file);

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java?rev=985314&r1=985313&r2=985314&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/hadoop/similarity/item/ItemSimilarityTest.java
 Fri Aug 13 18:20:53 2010
@@ -138,6 +138,24 @@ public final class ItemSimilarityTest ex
     EasyMock.verify(context);
   }
 
+  public void testPrefsToItemUserMatrixMapperBoolean() throws Exception {
+    Mapper<LongWritable,Text,VarIntWritable, MatrixEntryWritable>.Context 
context =
+      EasyMock.createMock(Mapper.Context.class);
+    context.write(EasyMock.eq(new 
VarIntWritable(TasteHadoopUtils.idToIndex(100L))),
+        MathHelper.matrixEntryMatches(TasteHadoopUtils.idToIndex(100L),
+        TasteHadoopUtils.idToIndex(12L), 1d));
+    context.write(EasyMock.eq(new 
VarIntWritable(TasteHadoopUtils.idToIndex(20L))),
+        MathHelper.matrixEntryMatches(TasteHadoopUtils.idToIndex(20L), 
TasteHadoopUtils.idToIndex(35L), 1d));
+    EasyMock.replay(context);
+
+    PrefsToItemUserMatrixMapper mapper = new PrefsToItemUserMatrixMapper();
+    setField(mapper, "booleanData", Boolean.TRUE);
+    mapper.map(null, new Text("12,100"), context);
+    mapper.map(null, new Text("35,20,3.0"), context);
+
+    EasyMock.verify(context);
+  }
+
   /**
    * tests {...@link PrefsToItemUserMatrixReducer}
    *


Reply via email to