Author: ssc
Date: Mon Jun 17 07:08:02 2013
New Revision: 1493660
URL: http://svn.apache.org/r1493660
Log:
MAHOUT-1264 Performance optimizations in RecommenderJob
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
Modified: mahout/trunk/CHANGELOG
URL:
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Mon Jun 17 07:08:02 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.8 - unreleased
+ MAHOUT-1264: Performance optimizations in RecommenderJob (ssc)
+
MAHOUT-1262: Cleanup LDA code (ssc)
MAHOUT-1255: Fix for weights in Multinomial sometimes overflowing in
BallKMeans (dfilimon)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
Mon Jun 17 07:08:02 2013
@@ -69,6 +69,8 @@ public final class AggregateAndRecommend
private FastIDSet itemsToRecommendFor;
private OpenIntLongHashMap indexItemIDMap;
+ private final RecommendedItemsWritable recommendedItems = new
RecommendedItemsWritable();
+
private static final float BOOLEAN_PREF_VALUE = 1.0f;
@Override
@@ -203,7 +205,8 @@ public final class AggregateAndRecommend
List<RecommendedItem> topItems = topKItems.getTopItems();
if (!topItems.isEmpty()) {
- context.write(userID, new RecommendedItemsWritable(topItems));
+ recommendedItems.set(topItems);
+ context.write(userID, recommendedItems);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterAsVectorAndPrefsReducer.java
Mon Jun 17 07:08:02 2013
@@ -35,6 +35,10 @@ import java.util.List;
*/
public class ItemFilterAsVectorAndPrefsReducer
extends
Reducer<VarLongWritable,VarLongWritable,VarIntWritable,VectorAndPrefsWritable> {
+
+ private final VarIntWritable itemIDIndexWritable = new VarIntWritable();
+ private final VectorAndPrefsWritable vectorAndPrefs = new
VectorAndPrefsWritable();
+
@Override
protected void reduce(VarLongWritable itemID, Iterable<VarLongWritable>
values, Context ctx)
throws IOException, InterruptedException {
@@ -51,6 +55,8 @@ public class ItemFilterAsVectorAndPrefsR
prefValues.add(1.0f);
}
- ctx.write(new VarIntWritable(itemIDIndex), new
VectorAndPrefsWritable(vector, userIDs, prefValues));
+ itemIDIndexWritable.set(itemIDIndex);
+ vectorAndPrefs.set(vector, userIDs, prefValues);
+ ctx.write(itemIDIndexWritable, vectorAndPrefs);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemFilterMapper.java
Mon Jun 17 07:08:02 2013
@@ -32,11 +32,16 @@ public class ItemFilterMapper extends Ma
private static final Pattern SEPARATOR = Pattern.compile("[\t,]");
+ private final VarLongWritable itemIDWritable = new VarLongWritable();
+ private final VarLongWritable userIDWritable = new VarLongWritable();
+
@Override
protected void map(LongWritable key, Text line, Context ctx) throws
IOException, InterruptedException {
String[] tokens = SEPARATOR.split(line.toString());
long userID = Long.parseLong(tokens[0]);
long itemID = Long.parseLong(tokens[1]);
- ctx.write(new VarLongWritable(itemID), new VarLongWritable(userID));
+ itemIDWritable.set(itemID);
+ userIDWritable.set(userID);
+ ctx.write(itemIDWritable, userIDWritable);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexMapper.java
Mon Jun 17 07:08:02 2013
@@ -33,6 +33,9 @@ public final class ItemIDIndexMapper ext
private boolean transpose;
+ private final VarIntWritable indexWritable = new VarIntWritable();
+ private final VarLongWritable itemIDWritable = new VarLongWritable();
+
@Override
protected void setup(Context context) {
Configuration jobConf = context.getConfiguration();
@@ -46,6 +49,8 @@ public final class ItemIDIndexMapper ext
String[] tokens = TasteHadoopUtils.splitPrefTokens(value.toString());
long itemID = Long.parseLong(tokens[transpose ? 0 : 1]);
int index = TasteHadoopUtils.idToIndex(itemID);
- context.write(new VarIntWritable(index), new VarLongWritable(itemID));
+ indexWritable.set(index);
+ itemIDWritable.set(itemID);
+ context.write(indexWritable, itemIDWritable);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ItemIDIndexReducer.java
Mon Jun 17 07:08:02 2013
@@ -25,7 +25,9 @@ import org.apache.mahout.math.VarLongWri
public final class ItemIDIndexReducer extends
Reducer<VarIntWritable, VarLongWritable, VarIntWritable,VarLongWritable> {
-
+
+ private final VarLongWritable minimumItemIDWritable = new VarLongWritable();
+
@Override
protected void reduce(VarIntWritable index,
Iterable<VarLongWritable> possibleItemIDs,
@@ -38,7 +40,8 @@ public final class ItemIDIndexReducer ex
}
}
if (minimumItemID != Long.MAX_VALUE) {
- context.write(index, new VarLongWritable(minimumItemID));
+ minimumItemIDWritable.set(minimumItemID);
+ context.write(index, minimumItemIDWritable);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/PartialMultiplyMapper.java
Mon Jun 17 07:08:02 2013
@@ -31,6 +31,9 @@ import org.apache.mahout.math.Vector;
public final class PartialMultiplyMapper extends
Mapper<VarIntWritable,VectorAndPrefsWritable,VarLongWritable,PrefAndSimilarityColumnWritable>
{
+ private final VarLongWritable userIDWritable = new VarLongWritable();
+ private final PrefAndSimilarityColumnWritable prefAndSimilarityColumn = new
PrefAndSimilarityColumnWritable();
+
@Override
protected void map(VarIntWritable key,
VectorAndPrefsWritable vectorAndPrefsWritable,
@@ -40,9 +43,6 @@ public final class PartialMultiplyMapper
List<Long> userIDs = vectorAndPrefsWritable.getUserIDs();
List<Float> prefValues = vectorAndPrefsWritable.getValues();
- VarLongWritable userIDWritable = new VarLongWritable();
- PrefAndSimilarityColumnWritable prefAndSimilarityColumn = new
PrefAndSimilarityColumnWritable();
-
for (int i = 0; i < userIDs.size(); i++) {
long userID = userIDs.get(i);
float prefValue = prefValues.get(i);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Mon Jun 17 07:08:02 2013
@@ -22,7 +22,7 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.JobContext;
-import org.apache.hadoop.mapreduce.Mapper;
+import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat;
@@ -147,8 +147,6 @@ public final class RecommenderJob extend
Path prepPath = getTempPath("preparePreferenceMatrix");
Path similarityMatrixPath = getTempPath("similarityMatrix");
- Path prePartialMultiplyPath1 = getTempPath("prePartialMultiply1");
- Path prePartialMultiplyPath2 = getTempPath("prePartialMultiply2");
Path explicitFilterPath = getTempPath("explicitFilterPath");
Path partialMultiplyPath = getTempPath("partialMultiply");
@@ -211,39 +209,29 @@ public final class RecommenderJob extend
//start the multiplication of the co-occurrence matrix by the user vectors
if (shouldRunNextPhase(parsedArgs, currentPhase)) {
- Job prePartialMultiply1 = prepareJob(
- similarityMatrixPath, prePartialMultiplyPath1,
SequenceFileInputFormat.class,
- SimilarityMatrixRowWrapperMapper.class, VarIntWritable.class,
VectorOrPrefWritable.class,
- SequenceFileOutputFormat.class);
- boolean succeeded = prePartialMultiply1.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
- //continue the multiplication
- Job prePartialMultiply2 = prepareJob(new Path(prepPath,
PreparePreferenceMatrixJob.USER_VECTORS),
- prePartialMultiplyPath2,
- SequenceFileInputFormat.class,
- UserVectorSplitterMapper.class,
- VarIntWritable.class,
- VectorOrPrefWritable.class,
- SequenceFileOutputFormat.class);
+ Job partialMultiply = new Job(getConf(), "partialMultiply");
+ Configuration partialMultiplyConf = partialMultiply.getConfiguration();
+
+ MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath,
SequenceFileInputFormat.class,
+ SimilarityMatrixRowWrapperMapper.class);
+ MultipleInputs.addInputPath(partialMultiply, new Path(prepPath,
PreparePreferenceMatrixJob.USER_VECTORS),
+ SequenceFileInputFormat.class, UserVectorSplitterMapper.class);
+ partialMultiply.setJarByClass(ToVectorAndPrefReducer.class);
+ partialMultiply.setMapOutputKeyClass(VarIntWritable.class);
+ partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class);
+ partialMultiply.setReducerClass(ToVectorAndPrefReducer.class);
+ partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class);
+ partialMultiply.setOutputKeyClass(VarIntWritable.class);
+ partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class);
+ partialMultiplyConf.setBoolean("mapred.compress.map.output", true);
+ partialMultiplyConf.set("mapred.output.dir",
partialMultiplyPath.toString());
+
if (usersFile != null) {
-
prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE,
usersFile);
+ partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE,
usersFile);
}
-
prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
- maxPrefsPerUser);
- succeeded = prePartialMultiply2.waitForCompletion(true);
- if (!succeeded) {
- return -1;
- }
- //finish the job
- Job partialMultiply = prepareJob(
- new Path(prePartialMultiplyPath1 + "," +
prePartialMultiplyPath2), partialMultiplyPath,
- SequenceFileInputFormat.class, Mapper.class,
VarIntWritable.class, VectorOrPrefWritable.class,
- ToVectorAndPrefReducer.class, VarIntWritable.class,
VectorAndPrefsWritable.class,
- SequenceFileOutputFormat.class);
- setS3SafeCombinedInputPath(partialMultiply, getTempPath(),
prePartialMultiplyPath1, prePartialMultiplyPath2);
- succeeded = partialMultiply.waitForCompletion(true);
+
partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
maxPrefsPerUser);
+
+ boolean succeeded = partialMultiply.waitForCompletion(true);
if (!succeeded) {
return -1;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/SimilarityMatrixRowWrapperMapper.java
Mon Jun 17 07:08:02 2013
@@ -34,6 +34,9 @@ import org.apache.mahout.math.VectorWrit
public final class SimilarityMatrixRowWrapperMapper extends
Mapper<IntWritable,VectorWritable,VarIntWritable,VectorOrPrefWritable> {
+ private final VarIntWritable index = new VarIntWritable();
+ private final VectorOrPrefWritable vectorOrPref = new VectorOrPrefWritable();
+
@Override
protected void map(IntWritable key,
VectorWritable value,
@@ -41,7 +44,11 @@ public final class SimilarityMatrixRowWr
Vector similarityMatrixRow = value.get();
/* remove self similarity */
similarityMatrixRow.set(key.get(), Double.NaN);
- context.write(new VarIntWritable(key.get()), new
VectorOrPrefWritable(similarityMatrixRow));
+
+ index.set(key.get());
+ vectorOrPref.set(similarityMatrixRow);
+
+ context.write(index, vectorOrPref);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToUserVectorsReducer.java
Mon Jun 17 07:08:02 2013
@@ -54,6 +54,8 @@ public final class ToUserVectorsReducer
public enum Counters { USERS }
+ private final VectorWritable userVectorWritable = new VectorWritable();
+
@Override
protected void setup(Context ctx) throws IOException, InterruptedException {
super.setup(ctx);
@@ -72,10 +74,10 @@ public final class ToUserVectorsReducer
}
if (userVector.getNumNondefaultElements() >= minPreferences) {
- VectorWritable vw = new VectorWritable(userVector);
- vw.setWritesLaxPrecision(true);
+ userVectorWritable.set(userVector);
+ userVectorWritable.setWritesLaxPrecision(true);
context.getCounter(Counters.USERS).increment(1);
- context.write(userID, vw);
+ context.write(userID, userVectorWritable);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/ToVectorAndPrefReducer.java
Mon Jun 17 07:08:02 2013
@@ -28,6 +28,8 @@ import org.apache.mahout.math.Vector;
public final class ToVectorAndPrefReducer extends
Reducer<VarIntWritable,VectorOrPrefWritable,VarIntWritable,VectorAndPrefsWritable>
{
+ private final VectorAndPrefsWritable vectorAndPrefs = new
VectorAndPrefsWritable();
+
@Override
protected void reduce(VarIntWritable key,
Iterable<VectorOrPrefWritable> values,
@@ -54,7 +56,7 @@ public final class ToVectorAndPrefReduce
return;
}
- VectorAndPrefsWritable vectorAndPrefs = new
VectorAndPrefsWritable(similarityMatrixColumn, userIDs, prefValues);
+ vectorAndPrefs.set(similarityMatrixColumn, userIDs, prefValues);
context.write(key, vectorAndPrefs);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
Mon Jun 17 07:08:02 2013
@@ -49,6 +49,9 @@ public final class UserVectorSplitterMap
private int maxPrefsPerUserConsidered;
private FastIDSet usersToRecommendFor;
+ private final VarIntWritable itemIndexWritable = new VarIntWritable();
+ private final VectorOrPrefWritable vectorOrPref = new VectorOrPrefWritable();
+
@Override
protected void setup(Context context) throws IOException {
Configuration jobConf = context.getConfiguration();
@@ -84,8 +87,7 @@ public final class UserVectorSplitterMap
return;
}
Vector userVector = maybePruneUserVector(value.get());
- VarIntWritable itemIndexWritable = new VarIntWritable();
- VectorOrPrefWritable vectorOrPref = new VectorOrPrefWritable();
+
for (Element e : userVector.nonZeroes()) {
itemIndexWritable.set(e.index());
vectorOrPref.set(userID, (float) e.get());
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/VectorAndPrefsWritable.java
Mon Jun 17 07:08:02 2013
@@ -38,6 +38,10 @@ public final class VectorAndPrefsWritabl
}
public VectorAndPrefsWritable(Vector vector, List<Long> userIDs, List<Float>
values) {
+ set(vector, userIDs, values);
+ }
+
+ public void set(Vector vector, List<Long> userIDs, List<Float> values) {
this.vector = vector;
this.userIDs = userIDs;
this.values = values;
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsMapper.java
Mon Jun 17 07:08:02 2013
@@ -37,6 +37,9 @@ public class ToItemVectorsMapper
USER_RATINGS_USED, USER_RATINGS_NEGLECTED
}
+ private final IntWritable itemID = new IntWritable();
+ private final VectorWritable itemVectorWritable = new VectorWritable();
+
private int sampleSize;
@Override
@@ -54,12 +57,17 @@ public class ToItemVectorsMapper
int numElementsAfterSampling = userRatings.getNumNondefaultElements();
int column = TasteHadoopUtils.idToIndex(rowIndex.get());
- VectorWritable itemVector = new VectorWritable(new
RandomAccessSparseVector(Integer.MAX_VALUE, 1));
- itemVector.setWritesLaxPrecision(true);
+ Vector itemVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 1);
+
+ itemVectorWritable.setWritesLaxPrecision(true);
for (Vector.Element elem : userRatings.nonZeroes()) {
- itemVector.get().setQuick(column, elem.get());
- ctx.write(new IntWritable(elem.index()), itemVector);
+ itemID.set(elem.index());
+ itemVector.setQuick(column, elem.get());
+ itemVectorWritable.set(itemVector);
+ ctx.write(itemID, itemVectorWritable);
+ // reset vector for reuse
+ itemVector.setQuick(elem.index(), 0.0);
}
ctx.getCounter(Elements.USER_RATINGS_USED).increment(numElementsAfterSampling);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/preparation/ToItemVectorsReducer.java
Mon Jun 17 07:08:02 2013
@@ -25,11 +25,12 @@ import java.io.IOException;
class ToItemVectorsReducer extends
Reducer<IntWritable,VectorWritable,IntWritable,VectorWritable> {
+ private final VectorWritable merged = new VectorWritable();
@Override
protected void reduce(IntWritable row, Iterable<VectorWritable> vectors,
Context ctx)
throws IOException, InterruptedException {
- VectorWritable vectorWritable = VectorWritable.merge(vectors.iterator());
- vectorWritable.setWritesLaxPrecision(true);
- ctx.write(row, vectorWritable);
+ merged.setWritesLaxPrecision(true);
+ merged.set(VectorWritable.mergeToVector(vectors.iterator()));
+ ctx.write(row, merged);
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java?rev=1493660&r1=1493659&r2=1493660&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/math/VectorWritable.java
Mon Jun 17 07:08:02 2013
@@ -201,6 +201,10 @@ public final class VectorWritable extend
}
public static VectorWritable merge(Iterator<VectorWritable> vectors) {
+ return new VectorWritable(mergeToVector(vectors));
+ }
+
+ public static Vector mergeToVector(Iterator<VectorWritable> vectors) {
Vector accumulator = vectors.next().get();
while (vectors.hasNext()) {
VectorWritable v = vectors.next();
@@ -210,7 +214,7 @@ public final class VectorWritable extend
}
}
}
- return new VectorWritable(accumulator);
+ return accumulator;
}
@Override