Author: srowen
Date: Sun Mar 20 15:27:02 2011
New Revision: 1083467

URL: http://svn.apache.org/viewvc?rev=1083467&view=rev
Log:
Last of first round of changes for KDD Cup example code

Added:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
Modified:
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
    
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java?rev=1083467&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/ToCSV.java
 Sun Mar 20 15:27:02 2011
@@ -0,0 +1,79 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup;
+
+import java.io.BufferedWriter;
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Writer;
+import java.nio.charset.Charset;
+import java.util.zip.GZIPOutputStream;
+
+
+import org.apache.mahout.cf.taste.model.PreferenceArray;
+import org.apache.mahout.common.Pair;
+
+/**
+ * <p>This class converts a KDD Cup input file into a compressed CSV format. 
The output format is
+ * <code>userID,itemID,score,timestamp</code>. It can optionally restrict its 
output to exclude
+ * score and/or timestamp.</p>
+ *
+ * <p>Run as: <code>ToCSV (input file) (output file) [num columns to 
output]</p>
+ */
+public final class ToCSV {
+
+  private ToCSV() {
+  }
+
+  public static void main(String[] args) throws Exception {
+
+    File inputFile = new File(args[0]);
+    File outputFile = new File(args[1]);
+    int columnsToOutput = 4;
+    if (args.length >= 3) {
+      columnsToOutput = Integer.parseInt(args[2]);
+    }
+
+    OutputStream outStream = new GZIPOutputStream(new 
FileOutputStream(outputFile));
+    Writer outWriter = new BufferedWriter(new OutputStreamWriter(outStream, 
Charset.forName("UTF-8")));
+
+    for (Pair<PreferenceArray,long[]> user : new DataFileIterable(inputFile)) {
+      PreferenceArray prefs = user.getFirst();
+      long[] timestamps = user.getSecond();
+      for (int i = 0; i < prefs.length(); i++) {
+        outWriter.write(String.valueOf(prefs.getUserID(i)));
+        outWriter.write(',');
+        outWriter.write(String.valueOf(prefs.getItemID(i)));
+        if (columnsToOutput > 2) {
+          outWriter.write(',');
+          outWriter.write(String.valueOf(prefs.getValue(i)));
+        }
+        if (columnsToOutput > 3) {
+          outWriter.write(',');
+          outWriter.write(String.valueOf(timestamps[i]));
+        }
+        outWriter.write('\n');
+      }
+    }
+    outWriter.flush();
+    outWriter.close();
+  }
+
+}

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Callable.java
 Sun Mar 20 15:27:02 2011
@@ -18,6 +18,7 @@
 package org.apache.mahout.cf.taste.example.kddcup.track1;
 
 import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.mahout.cf.taste.common.NoSuchItemException;
 import org.apache.mahout.cf.taste.common.TasteException;
@@ -29,6 +30,7 @@ import org.slf4j.LoggerFactory;
 final class Track1Callable implements Callable<byte[]> {
 
   private static final Logger log = 
LoggerFactory.getLogger(Track1Callable.class);
+  private static final AtomicInteger COUNT = new AtomicInteger();
 
   private final Recommender recommender;
   private final PreferenceArray userTest;
@@ -53,15 +55,24 @@ final class Track1Callable implements Ca
         continue;
       }
 
-      int scaledEstimate = (int) (estimate * 2.55);
-      if (scaledEstimate > 255) {
-        scaledEstimate = 255;
-      } else if (scaledEstimate < 0) {
-        scaledEstimate = 0;
+      if (Double.isNaN(estimate)) {
+        log.warn("Unable to compute estimate for user {}, item {}", userID, 
itemID);
+        result[i] = 0x7F;
+      } else {
+        int scaledEstimate = (int) (estimate * 2.55);
+        if (scaledEstimate > 255) {
+          scaledEstimate = 255;
+        } else if (scaledEstimate < 0) {
+          scaledEstimate = 0;
+        }
+        result[i] = (byte) scaledEstimate;
       }
+    }
 
-      result[i] = (byte) scaledEstimate;
+    if (COUNT.incrementAndGet() % 10000 == 0) {
+      log.info("Completed {} users", COUNT.get());
     }
+
     return result;
   }
 

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track1/Track1Recommender.java
 Sun Mar 20 15:27:02 2011
@@ -25,6 +25,7 @@ import org.apache.mahout.cf.taste.common
 import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
 import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
 import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
+import org.apache.mahout.cf.taste.impl.similarity.UncenteredCosineSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.IDRescorer;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -37,7 +38,7 @@ public final class Track1Recommender imp
 
   public Track1Recommender(DataModel dataModel) throws TasteException {
     // Change this to whatever you like!
-    ItemSimilarity similarity = new CachingItemSimilarity(new 
LogLikelihoodSimilarity(dataModel), dataModel);
+    ItemSimilarity similarity = new UncenteredCosineSimilarity(dataModel);
     recommender = new GenericItemBasedRecommender(dataModel, similarity);
   }
   

Added: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java?rev=1083467&view=auto
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
 (added)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/HybridSimilarity.java
 Sun Mar 20 15:27:02 2011
@@ -0,0 +1,60 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.example.kddcup.track2;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.TasteException;
+import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
+import org.apache.mahout.cf.taste.model.DataModel;
+import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
+
+final class HybridSimilarity implements ItemSimilarity {
+
+  private final ItemSimilarity cfSimilarity;
+  private final ItemSimilarity contentSimilarity;
+
+  HybridSimilarity(DataModel dataModel, File dataFileDirectory) throws 
IOException {
+    cfSimilarity = new LogLikelihoodSimilarity(dataModel);
+    contentSimilarity = new TrackItemSimilarity(dataFileDirectory);
+  }
+
+  @Override
+  public double itemSimilarity(long itemID1, long itemID2) throws 
TasteException {
+    return contentSimilarity.itemSimilarity(itemID1, itemID2) * 
cfSimilarity.itemSimilarity(itemID1, itemID2);
+  }
+
+  @Override
+  public double[] itemSimilarities(long itemID1, long[] itemID2s) throws 
TasteException {
+    double[] result = contentSimilarity.itemSimilarities(itemID1, itemID2s);
+    double[] multipliers = cfSimilarity.itemSimilarities(itemID1, itemID2s);
+    for (int i = 0; i < result.length; i++) {
+      result[i] *= multipliers[i];
+    }
+    return result;
+  }
+
+  @Override
+  public void refresh(Collection<Refreshable> alreadyRefreshed) {
+    cfSimilarity.refresh(alreadyRefreshed);
+  }
+
+}

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Callable.java
 Sun Mar 20 15:27:02 2011
@@ -24,6 +24,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.TreeMap;
 import java.util.concurrent.Callable;
+import java.util.concurrent.atomic.AtomicInteger;
 
 import org.apache.mahout.cf.taste.common.NoSuchItemException;
 import org.apache.mahout.cf.taste.common.TasteException;
@@ -35,6 +36,7 @@ import org.slf4j.LoggerFactory;
 final class Track2Callable implements Callable<UserResult> {
 
   private static final Logger log = 
LoggerFactory.getLogger(Track2Callable.class);
+  private static final AtomicInteger COUNT = new AtomicInteger();
 
   private final Recommender recommender;
   private final PreferenceArray userTest;
@@ -94,6 +96,11 @@ final class Track2Callable implements Ca
     for (int i = 0; i < testSize; i++) {
       result[i] = topThree.contains(userTest.getItemID(i));
     }
+
+    if (COUNT.incrementAndGet() % 1000 == 0) {
+      log.info("Completed {} users", COUNT.get());
+    }
+
     return new UserResult(userID, result);
   }
 }

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/Track2Recommender.java
 Sun Mar 20 15:27:02 2011
@@ -24,11 +24,7 @@ import java.util.List;
 
 import org.apache.mahout.cf.taste.common.Refreshable;
 import org.apache.mahout.cf.taste.common.TasteException;
-import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
 import 
org.apache.mahout.cf.taste.impl.recommender.GenericBooleanPrefItemBasedRecommender;
-import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
-import org.apache.mahout.cf.taste.impl.similarity.CachingItemSimilarity;
-import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
 import org.apache.mahout.cf.taste.model.DataModel;
 import org.apache.mahout.cf.taste.recommender.IDRescorer;
 import org.apache.mahout.cf.taste.recommender.RecommendedItem;
@@ -43,7 +39,7 @@ public final class Track2Recommender imp
     // Change this to whatever you like!
     ItemSimilarity similarity;
     try {
-      similarity = new TrackItemSimilarity(dataFileDirectory);
+      similarity = new HybridSimilarity(dataModel, dataFileDirectory);
     } catch (IOException ioe) {
       throw new TasteException(ioe);
     }

Modified: 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java?rev=1083467&r1=1083466&r2=1083467&view=diff
==============================================================================
--- 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
 (original)
+++ 
mahout/trunk/examples/src/main/java/org/apache/mahout/cf/taste/example/kddcup/track2/TrackItemSimilarity.java
 Sun Mar 20 15:27:02 2011
@@ -22,7 +22,6 @@ import java.io.IOException;
 import java.util.Collection;
 
 import org.apache.mahout.cf.taste.common.Refreshable;
-import org.apache.mahout.cf.taste.common.TasteException;
 import org.apache.mahout.cf.taste.example.kddcup.KDDCupDataModel;
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;


Reply via email to