Author: srowen
Date: Fri Dec  2 00:12:13 2011
New Revision: 1209318

URL: http://svn.apache.org/viewvc?rev=1209318&view=rev
Log:
MAHOUT-903 add diff details, read diff details

Added:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java

Added: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java?rev=1209318&view=auto
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
 (added)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
 Fri Dec  2 00:12:13 2011
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.slopeone;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
+import org.apache.mahout.math.Varint;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public final class FullRunningAverageAndStdDevWritable implements Writable {
+  
+  private FullRunningAverageAndStdDev average;
+  
+  public FullRunningAverageAndStdDevWritable(FullRunningAverageAndStdDev 
average) {
+    this.average = average;
+  }
+  
+  public FullRunningAverageAndStdDev getAverage() {
+    return average;
+  }
+
+  @Override
+  public String toString() {
+    return new StringBuilder()
+        .append(average.getAverage()).append('\t')
+        .append(average.getCount()).append('\t')
+        .append(average.getMk()).append('\t')
+        .append(average.getSk()).toString();
+  }
+  
+  @Override
+  public void write(DataOutput dataOutput) throws IOException {
+    Varint.writeUnsignedVarInt(average.getCount(), dataOutput);
+    dataOutput.writeDouble(average.getAverage());
+    dataOutput.writeDouble(average.getMk());
+    dataOutput.writeDouble(average.getSk());
+  }
+
+  @Override
+  public void readFields(DataInput dataInput) throws IOException {
+    int count = Varint.readUnsignedVarInt(dataInput);
+    double diff = dataInput.readDouble();
+    double mk = dataInput.readDouble();
+    double sk = dataInput.readDouble();
+    average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
+  }
+
+}

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
 Fri Dec  2 00:12:13 2011
@@ -81,7 +81,7 @@ public final class SlopeOneAverageDiffsJ
                                           FloatWritable.class,
                                           SlopeOneDiffsToAveragesReducer.class,
                                           EntityEntityWritable.class,
-                                          FloatWritable.class,
+                                          
FullRunningAverageAndStdDevWritable.class,
                                           TextOutputFormat.class);
       FileOutputFormat.setOutputCompressorClass(diffsToAveragesJob, 
GzipCodec.class);
       diffsToAveragesJob.waitForCompletion(true);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
 Fri Dec  2 00:12:13 2011
@@ -22,20 +22,19 @@ import java.io.IOException;
 import org.apache.hadoop.io.FloatWritable;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
 
 public final class SlopeOneDiffsToAveragesReducer extends
-    Reducer<EntityEntityWritable,FloatWritable, 
EntityEntityWritable,FloatWritable> {
+    
Reducer<EntityEntityWritable,FloatWritable,EntityEntityWritable,FullRunningAverageAndStdDevWritable>
 {
   
   @Override
   protected void reduce(EntityEntityWritable key,
                         Iterable<FloatWritable> values,
                         Context context) throws IOException, 
InterruptedException {
-    int count = 0;
-    double total = 0.0;
+    FullRunningAverageAndStdDev average = new FullRunningAverageAndStdDev();
     for (FloatWritable value : values) {
-      total += value.get();
-      count++;
+      average.addDatum(value.get());
     }
-    context.write(key, new FloatWritable((float) (total / count)));
+    context.write(key, new FullRunningAverageAndStdDevWritable(average));
   }
 }
\ No newline at end of file

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
 Fri Dec  2 00:12:13 2011
@@ -71,6 +71,11 @@ public class FixedRunningAverage impleme
   }
 
   @Override
+  public RunningAverage inverse() {
+    return new InvertedRunningAverage(this);
+  }
+
+  @Override
   public synchronized String toString() {
     return String.valueOf(average);
   }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
 Fri Dec  2 00:12:13 2011
@@ -34,6 +34,11 @@ public final class FixedRunningAverageAn
   }
 
   @Override
+  public RunningAverageAndStdDev inverse() {
+    return new InvertedRunningAverageAndStdDev(this);
+  }
+
+  @Override
   public synchronized String toString() {
     return super.toString() + ',' + stdDev;
   }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
 Fri Dec  2 00:12:13 2011
@@ -95,6 +95,11 @@ public class FullRunningAverage implemen
   public synchronized double getAverage() {
     return average;
   }
+
+  @Override
+  public RunningAverage inverse() {
+    return new InvertedRunningAverage(this);
+  }
   
   @Override
   public synchronized String toString() {

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
 Fri Dec  2 00:12:13 2011
@@ -42,6 +42,14 @@ public final class FullRunningAverageAnd
     recomputeStdDev();
   }
 
+  public double getMk() {
+    return mk;
+  }
+  
+  public double getSk() {
+    return sk;
+  }
+
   @Override
   public synchronized double getStandardDeviation() {
     return stdDev;
@@ -85,6 +93,11 @@ public final class FullRunningAverageAnd
     int count = getCount();
     stdDev = count > 1 ? Math.sqrt(sk / (count - 1)) : Double.NaN;
   }
+
+  @Override
+  public RunningAverageAndStdDev inverse() {
+    return new InvertedRunningAverageAndStdDev(this);
+  }
   
   @Override
   public synchronized String toString() {

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
 Fri Dec  2 00:12:13 2011
@@ -49,5 +49,10 @@ public final class InvertedRunningAverag
   public double getAverage() {
     return -delegate.getAverage();
   }
+
+  @Override
+  public RunningAverage inverse() {
+    return delegate;
+  }
   
 }
\ No newline at end of file

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
 Fri Dec  2 00:12:13 2011
@@ -54,5 +54,10 @@ public final class InvertedRunningAverag
   public double getStandardDeviation() {
     return delegate.getStandardDeviation();
   }
+
+  @Override
+  public RunningAverageAndStdDev inverse() {
+    return delegate;
+  }
   
 }
\ No newline at end of file

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
 Fri Dec  2 00:12:13 2011
@@ -58,5 +58,10 @@ public interface RunningAverage {
   int getCount();
   
   double getAverage();
+
+  /**
+   * @return a (possibly immutable) object whose average is the negative of 
this object's
+   */
+  RunningAverage inverse();
   
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
 Fri Dec  2 00:12:13 2011
@@ -26,5 +26,10 @@ public interface RunningAverageAndStdDev
   
   /** @return standard deviation of data */
   double getStandardDeviation();
+
+  /**
+   * @return a (possibly immutable) object whose average is the negative of 
this object's
+   */
+  RunningAverageAndStdDev inverse();
   
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
 Fri Dec  2 00:12:13 2011
@@ -86,6 +86,11 @@ public class WeightedRunningAverage impl
   public synchronized double getAverage() {
     return average;
   }
+
+  @Override
+  public RunningAverage inverse() {
+    return new InvertedRunningAverage(this);
+  }
   
   @Override
   public synchronized String toString() {

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
 Fri Dec  2 00:12:13 2011
@@ -75,6 +75,11 @@ public final class WeightedRunningAverag
     return Math.sqrt((totalWeightedSquaredData * totalWeight - 
totalWeightedData * totalWeightedData)
                          / (totalWeight * totalWeight - totalSquaredWeight));
   }
+
+  @Override
+  public RunningAverageAndStdDev inverse() {
+    return new InvertedRunningAverageAndStdDev(this);
+  }
   
   @Override
   public synchronized String toString() {

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
 Fri Dec  2 00:12:13 2011
@@ -32,7 +32,6 @@ import org.apache.mahout.cf.taste.impl.c
 import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
 import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
 import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverageAndStdDev;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
 import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
@@ -132,11 +131,7 @@ public final class MemoryDiffStorage imp
       average = level2Map.get(itemID2);
     }
     if (inverted) {
-      if (average == null) {
-        return null;
-      }
-      return stdDevWeighted ? new 
InvertedRunningAverageAndStdDev((RunningAverageAndStdDev) average)
-          : new InvertedRunningAverage(average);
+      return average == null ? null : average.inverse();
     } else {
       return average;
     }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
 Fri Dec  2 00:12:13 2011
@@ -25,15 +25,18 @@ import java.util.Iterator;
 import java.util.Map;
 import java.util.concurrent.locks.ReadWriteLock;
 import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Pattern;
 
 import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.Weighting;
 import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
 import org.apache.mahout.cf.taste.impl.common.FastIDSet;
 import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
 import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverage;
 import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
 import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import 
org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
 import org.apache.mahout.cf.taste.model.PreferenceArray;
 import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
 import org.apache.mahout.common.iterator.FileLineIterator;
@@ -48,12 +51,21 @@ import com.google.common.base.Preconditi
  * one diff per line:
  * </p>
  * 
- * {@code itemID1,itemID2,diff}
+ * {@code itemID1,itemID2,diff[,count[,mk,sk]]}
  * 
  * <p>
+ * The fourth column is optional, and is a count representing the number of 
occurrences of the item-item pair
+ * that contribute to the diff. It is assumed to be 1 if not present. The 
fifth and sixth arguments are
+ * computed values used by {@link FullRunningAverageAndStdDev} implementations 
to compute a running standard deviation.
+ * They are required if using {@link Weighting#WEIGHTED} with {@link 
SlopeOneRecommender}.
+ * </p>
+ *
+ * <p>
  * Commas or tabs can be delimiters. This is intended for use in conjuction 
with the output of
  * {@link org.apache.mahout.cf.taste.hadoop.slopeone.SlopeOneAverageDiffsJob}.
  * </p>
+ *
+ * <p>Note that the same item-item pair should not appear on multiple lines -- 
one line per item-item pair.</p>
  */
 public final class FileDiffStorage implements DiffStorage {
   
@@ -61,7 +73,8 @@ public final class FileDiffStorage imple
   
   private static final long MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
   private static final char COMMENT_CHAR = '#';
-  
+  private static final Pattern SEPARATOR = Pattern.compile("[\t,]");
+
   private final File dataFile;
   private long lastModified;
   private final long maxEntries;
@@ -107,10 +120,9 @@ public final class FileDiffStorage imple
           iterator.next();
           firstLine = iterator.peek();
         }
-        char delimiter = FileDataModel.determineDelimiter(firstLine);
         long averageCount = 0L;
         while (iterator.hasNext()) {
-          averageCount = processLine(iterator.next(), delimiter, averageCount);
+          averageCount = processLine(iterator.next(), averageCount);
         }
         
         pruneInconsequentialDiffs();
@@ -124,20 +136,20 @@ public final class FileDiffStorage imple
     }
   }
   
-  private long processLine(String line, char delimiter, long averageCount) {
+  private long processLine(String line, long averageCount) {
 
     if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
       return averageCount;
     }
     
-    int delimiterOne = line.indexOf(delimiter);
-    Preconditions.checkArgument(delimiterOne >= 0, "Bad line: %s", line);
-    int delimiterTwo = line.indexOf(delimiter, delimiterOne + 1);
-    Preconditions.checkArgument(delimiterTwo >= 0, "Bad line: %s", line);
-    
-    long itemID1 = Long.parseLong(line.substring(0, delimiterOne));
-    long itemID2 = Long.parseLong(line.substring(delimiterOne + 1, 
delimiterTwo));
-    double diff = Double.parseDouble(line.substring(delimiterTwo + 1));
+    String[] tokens = SEPARATOR.split(line);
+    Preconditions.checkArgument(tokens.length >=3 && tokens.length != 5, "Bad 
line: %s", line);
+
+    long itemID1 = Long.parseLong(tokens[0]);
+    long itemID2 = Long.parseLong(tokens[1]);
+    double diff = Double.parseDouble(tokens[2]);
+    int count = tokens.length >= 4 ? Integer.parseInt(tokens[3]) : 1;
+    boolean hasMkSk = tokens.length >= 5;
     
     if (itemID1 > itemID2) {
       long temp = itemID1;
@@ -151,15 +163,21 @@ public final class FileDiffStorage imple
       averageDiffs.put(itemID1, level1Map);
     }
     RunningAverage average = level1Map.get(itemID2);
-    if (average == null && averageCount < maxEntries) {
-      average = new FullRunningAverage();
+    if (average != null) {
+      throw new IllegalArgumentException("Duplicated line for item-item pair " 
+ itemID1 + " / " + itemID2);
+    }
+    if (averageCount < maxEntries) {
+      if (hasMkSk) {
+        double mk = Double.parseDouble(tokens[4]);
+        double sk = Double.parseDouble(tokens[5]);
+        average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
+      } else {
+        average = new FullRunningAverage(count, diff);
+      }
       level1Map.put(itemID2, average);
       averageCount++;
     }
-    if (average != null) {
-      average.addDatum(diff);
-    }
-    
+
     allRecommendableItemIDs.add(itemID1);
     allRecommendableItemIDs.add(itemID2);
     
@@ -222,10 +240,7 @@ public final class FileDiffStorage imple
       average = level2Map.get(itemID2);
     }
     if (inverted) {
-      if (average == null) {
-        return null;
-      }
-      return new InvertedRunningAverage(average);
+      return average == null ? null : average.inverse();
     } else {
       return average;
     }


Reply via email to