Author: srowen
Date: Fri Dec 2 00:12:13 2011
New Revision: 1209318
URL: http://svn.apache.org/viewvc?rev=1209318&view=rev
Log:
MAHOUT-903 add diff details, read diff details
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java?rev=1209318&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/FullRunningAverageAndStdDevWritable.java
Fri Dec 2 00:12:13 2011
@@ -0,0 +1,66 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.cf.taste.hadoop.slopeone;
+
+import org.apache.hadoop.io.Writable;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
+import org.apache.mahout.math.Varint;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+
+public final class FullRunningAverageAndStdDevWritable implements Writable {
+
+ private FullRunningAverageAndStdDev average;
+
+ public FullRunningAverageAndStdDevWritable(FullRunningAverageAndStdDev
average) {
+ this.average = average;
+ }
+
+ public FullRunningAverageAndStdDev getAverage() {
+ return average;
+ }
+
+ @Override
+ public String toString() {
+ return new StringBuilder()
+ .append(average.getAverage()).append('\t')
+ .append(average.getCount()).append('\t')
+ .append(average.getMk()).append('\t')
+ .append(average.getSk()).toString();
+ }
+
+ @Override
+ public void write(DataOutput dataOutput) throws IOException {
+ Varint.writeUnsignedVarInt(average.getCount(), dataOutput);
+ dataOutput.writeDouble(average.getAverage());
+ dataOutput.writeDouble(average.getMk());
+ dataOutput.writeDouble(average.getSk());
+ }
+
+ @Override
+ public void readFields(DataInput dataInput) throws IOException {
+ int count = Varint.readUnsignedVarInt(dataInput);
+ double diff = dataInput.readDouble();
+ double mk = dataInput.readDouble();
+ double sk = dataInput.readDouble();
+ average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
+ }
+
+}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneAverageDiffsJob.java
Fri Dec 2 00:12:13 2011
@@ -81,7 +81,7 @@ public final class SlopeOneAverageDiffsJ
FloatWritable.class,
SlopeOneDiffsToAveragesReducer.class,
EntityEntityWritable.class,
- FloatWritable.class,
+
FullRunningAverageAndStdDevWritable.class,
TextOutputFormat.class);
FileOutputFormat.setOutputCompressorClass(diffsToAveragesJob,
GzipCodec.class);
diffsToAveragesJob.waitForCompletion(true);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/slopeone/SlopeOneDiffsToAveragesReducer.java
Fri Dec 2 00:12:13 2011
@@ -22,20 +22,19 @@ import java.io.IOException;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
public final class SlopeOneDiffsToAveragesReducer extends
- Reducer<EntityEntityWritable,FloatWritable,
EntityEntityWritable,FloatWritable> {
+
Reducer<EntityEntityWritable,FloatWritable,EntityEntityWritable,FullRunningAverageAndStdDevWritable>
{
@Override
protected void reduce(EntityEntityWritable key,
Iterable<FloatWritable> values,
Context context) throws IOException,
InterruptedException {
- int count = 0;
- double total = 0.0;
+ FullRunningAverageAndStdDev average = new FullRunningAverageAndStdDev();
for (FloatWritable value : values) {
- total += value.get();
- count++;
+ average.addDatum(value.get());
}
- context.write(key, new FloatWritable((float) (total / count)));
+ context.write(key, new FullRunningAverageAndStdDevWritable(average));
}
}
\ No newline at end of file
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverage.java
Fri Dec 2 00:12:13 2011
@@ -71,6 +71,11 @@ public class FixedRunningAverage impleme
}
@Override
+ public RunningAverage inverse() {
+ return new InvertedRunningAverage(this);
+ }
+
+ @Override
public synchronized String toString() {
return String.valueOf(average);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FixedRunningAverageAndStdDev.java
Fri Dec 2 00:12:13 2011
@@ -34,6 +34,11 @@ public final class FixedRunningAverageAn
}
@Override
+ public RunningAverageAndStdDev inverse() {
+ return new InvertedRunningAverageAndStdDev(this);
+ }
+
+ @Override
public synchronized String toString() {
return super.toString() + ',' + stdDev;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverage.java
Fri Dec 2 00:12:13 2011
@@ -95,6 +95,11 @@ public class FullRunningAverage implemen
public synchronized double getAverage() {
return average;
}
+
+ @Override
+ public RunningAverage inverse() {
+ return new InvertedRunningAverage(this);
+ }
@Override
public synchronized String toString() {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/FullRunningAverageAndStdDev.java
Fri Dec 2 00:12:13 2011
@@ -42,6 +42,14 @@ public final class FullRunningAverageAnd
recomputeStdDev();
}
+ public double getMk() {
+ return mk;
+ }
+
+ public double getSk() {
+ return sk;
+ }
+
@Override
public synchronized double getStandardDeviation() {
return stdDev;
@@ -85,6 +93,11 @@ public final class FullRunningAverageAnd
int count = getCount();
stdDev = count > 1 ? Math.sqrt(sk / (count - 1)) : Double.NaN;
}
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return new InvertedRunningAverageAndStdDev(this);
+ }
@Override
public synchronized String toString() {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverage.java
Fri Dec 2 00:12:13 2011
@@ -49,5 +49,10 @@ public final class InvertedRunningAverag
public double getAverage() {
return -delegate.getAverage();
}
+
+ @Override
+ public RunningAverage inverse() {
+ return delegate;
+ }
}
\ No newline at end of file
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/InvertedRunningAverageAndStdDev.java
Fri Dec 2 00:12:13 2011
@@ -54,5 +54,10 @@ public final class InvertedRunningAverag
public double getStandardDeviation() {
return delegate.getStandardDeviation();
}
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return delegate;
+ }
}
\ No newline at end of file
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverage.java
Fri Dec 2 00:12:13 2011
@@ -58,5 +58,10 @@ public interface RunningAverage {
int getCount();
double getAverage();
+
+ /**
+ * @return a (possibly immutable) object whose average is the negative of
this object's
+ */
+ RunningAverage inverse();
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/RunningAverageAndStdDev.java
Fri Dec 2 00:12:13 2011
@@ -26,5 +26,10 @@ public interface RunningAverageAndStdDev
/** @return standard deviation of data */
double getStandardDeviation();
+
+ /**
+ * @return a (possibly immutable) object whose average is the negative of
this object's
+ */
+ RunningAverageAndStdDev inverse();
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverage.java
Fri Dec 2 00:12:13 2011
@@ -86,6 +86,11 @@ public class WeightedRunningAverage impl
public synchronized double getAverage() {
return average;
}
+
+ @Override
+ public RunningAverage inverse() {
+ return new InvertedRunningAverage(this);
+ }
@Override
public synchronized String toString() {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/common/WeightedRunningAverageAndStdDev.java
Fri Dec 2 00:12:13 2011
@@ -75,6 +75,11 @@ public final class WeightedRunningAverag
return Math.sqrt((totalWeightedSquaredData * totalWeight -
totalWeightedData * totalWeightedData)
/ (totalWeight * totalWeight - totalSquaredWeight));
}
+
+ @Override
+ public RunningAverageAndStdDev inverse() {
+ return new InvertedRunningAverageAndStdDev(this);
+ }
@Override
public synchronized String toString() {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/MemoryDiffStorage.java
Fri Dec 2 00:12:13 2011
@@ -32,7 +32,6 @@ import org.apache.mahout.cf.taste.impl.c
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverage;
-import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.RefreshHelper;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
@@ -132,11 +131,7 @@ public final class MemoryDiffStorage imp
average = level2Map.get(itemID2);
}
if (inverted) {
- if (average == null) {
- return null;
- }
- return stdDevWeighted ? new
InvertedRunningAverageAndStdDev((RunningAverageAndStdDev) average)
- : new InvertedRunningAverage(average);
+ return average == null ? null : average.inverse();
} else {
return average;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java?rev=1209318&r1=1209317&r2=1209318&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/impl/recommender/slopeone/file/FileDiffStorage.java
Fri Dec 2 00:12:13 2011
@@ -25,15 +25,18 @@ import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
+import java.util.regex.Pattern;
import org.apache.mahout.cf.taste.common.Refreshable;
+import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.common.FullRunningAverage;
+import org.apache.mahout.cf.taste.impl.common.FullRunningAverageAndStdDev;
import org.apache.mahout.cf.taste.impl.common.InvertedRunningAverage;
import org.apache.mahout.cf.taste.impl.common.LongPrimitiveIterator;
import org.apache.mahout.cf.taste.impl.common.RunningAverage;
-import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
+import
org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.apache.mahout.common.iterator.FileLineIterator;
@@ -48,12 +51,21 @@ import com.google.common.base.Preconditi
* one diff per line:
* </p>
*
- * {@code itemID1,itemID2,diff}
+ * {@code itemID1,itemID2,diff[,count[,mk,sk]]}
*
* <p>
+ * The fourth column is optional, and is a count representing the number of
occurrences of the item-item pair
+ * that contribute to the diff. It is assumed to be 1 if not present. The
fifth and sixth arguments are
+ * computed values used by {@link FullRunningAverageAndStdDev} implementations
to compute a running standard deviation.
+ * They are required if using {@link Weighting#WEIGHTED} with {@link
SlopeOneRecommender}.
+ * </p>
+ *
+ * <p>
* Commas or tabs can be delimiters. This is intended for use in conjuction
with the output of
* {@link org.apache.mahout.cf.taste.hadoop.slopeone.SlopeOneAverageDiffsJob}.
* </p>
+ *
+ * <p>Note that the same item-item pair should not appear on multiple lines --
one line per item-item pair.</p>
*/
public final class FileDiffStorage implements DiffStorage {
@@ -61,7 +73,8 @@ public final class FileDiffStorage imple
private static final long MIN_RELOAD_INTERVAL_MS = 60 * 1000L; // 1 minute?
private static final char COMMENT_CHAR = '#';
-
+ private static final Pattern SEPARATOR = Pattern.compile("[\t,]");
+
private final File dataFile;
private long lastModified;
private final long maxEntries;
@@ -107,10 +120,9 @@ public final class FileDiffStorage imple
iterator.next();
firstLine = iterator.peek();
}
- char delimiter = FileDataModel.determineDelimiter(firstLine);
long averageCount = 0L;
while (iterator.hasNext()) {
- averageCount = processLine(iterator.next(), delimiter, averageCount);
+ averageCount = processLine(iterator.next(), averageCount);
}
pruneInconsequentialDiffs();
@@ -124,20 +136,20 @@ public final class FileDiffStorage imple
}
}
- private long processLine(String line, char delimiter, long averageCount) {
+ private long processLine(String line, long averageCount) {
if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
return averageCount;
}
- int delimiterOne = line.indexOf(delimiter);
- Preconditions.checkArgument(delimiterOne >= 0, "Bad line: %s", line);
- int delimiterTwo = line.indexOf(delimiter, delimiterOne + 1);
- Preconditions.checkArgument(delimiterTwo >= 0, "Bad line: %s", line);
-
- long itemID1 = Long.parseLong(line.substring(0, delimiterOne));
- long itemID2 = Long.parseLong(line.substring(delimiterOne + 1,
delimiterTwo));
- double diff = Double.parseDouble(line.substring(delimiterTwo + 1));
+ String[] tokens = SEPARATOR.split(line);
+ Preconditions.checkArgument(tokens.length >=3 && tokens.length != 5, "Bad
line: %s", line);
+
+ long itemID1 = Long.parseLong(tokens[0]);
+ long itemID2 = Long.parseLong(tokens[1]);
+ double diff = Double.parseDouble(tokens[2]);
+ int count = tokens.length >= 4 ? Integer.parseInt(tokens[3]) : 1;
+ boolean hasMkSk = tokens.length >= 5;
if (itemID1 > itemID2) {
long temp = itemID1;
@@ -151,15 +163,21 @@ public final class FileDiffStorage imple
averageDiffs.put(itemID1, level1Map);
}
RunningAverage average = level1Map.get(itemID2);
- if (average == null && averageCount < maxEntries) {
- average = new FullRunningAverage();
+ if (average != null) {
+ throw new IllegalArgumentException("Duplicated line for item-item pair "
+ itemID1 + " / " + itemID2);
+ }
+ if (averageCount < maxEntries) {
+ if (hasMkSk) {
+ double mk = Double.parseDouble(tokens[4]);
+ double sk = Double.parseDouble(tokens[5]);
+ average = new FullRunningAverageAndStdDev(count, diff, mk, sk);
+ } else {
+ average = new FullRunningAverage(count, diff);
+ }
level1Map.put(itemID2, average);
averageCount++;
}
- if (average != null) {
- average.addDatum(diff);
- }
-
+
allRecommendableItemIDs.add(itemID1);
allRecommendableItemIDs.add(itemID2);
@@ -222,10 +240,7 @@ public final class FileDiffStorage imple
average = level2Map.get(itemID2);
}
if (inverted) {
- if (average == null) {
- return null;
- }
- return new InvertedRunningAverage(average);
+ return average == null ? null : average.inverse();
} else {
return average;
}