Author: srowen
Date: Tue Jun 1 12:55:49 2010
New Revision: 950049
URL: http://svn.apache.org/viewvc?rev=950049&view=rev
Log:
MAHOUT-407 also make similar options for item similarity configurable in
recommender
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java?rev=950049&r1=950048&r2=950049&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/AggregateAndRecommendReducer.java
Tue Jun 1 12:55:49 2010
@@ -46,7 +46,8 @@ public final class AggregateAndRecommend
Reducer<VarLongWritable,VectorWritable,VarLongWritable,RecommendedItemsWritable>
{
static final String ITEMID_INDEX_PATH = "itemIDIndexPath";
- static final String RECOMMENDATIONS_PER_USER = "recommendationsPerUser";
+ static final String NUM_RECOMMENDATIONS = "numRecommendations";
+ static final int DEFAULT_NUM_RECOMMENDATIONS = 10;
private static final PathFilter PARTS_FILTER = new PathFilter() {
@Override
@@ -61,7 +62,7 @@ public final class AggregateAndRecommend
@Override
protected void setup(Context context) {
Configuration jobConf = context.getConfiguration();
- recommendationsPerUser = jobConf.getInt(RECOMMENDATIONS_PER_USER, 10);
+ recommendationsPerUser = jobConf.getInt(NUM_RECOMMENDATIONS,
DEFAULT_NUM_RECOMMENDATIONS);
try {
FileSystem fs = FileSystem.get(jobConf);
Path itemIDIndexPath = new
Path(jobConf.get(ITEMID_INDEX_PATH)).makeQualified(fs);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java?rev=950049&r1=950048&r2=950049&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/RecommenderJob.java
Tue Jun 1 12:55:49 2010
@@ -27,6 +27,7 @@ import org.apache.commons.cli2.Option;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
+import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
@@ -54,6 +55,10 @@ import org.apache.mahout.math.VectorWrit
* <li>--usersFile (path): file containing user IDs to recommend for
(optional)</li>
* <li>--numRecommendations (integer): Number of recommendations to compute
per user (optional; default 10)</li>
* <li>--booleanData (boolean): Treat input data as having to pref values
(false)</li>
+ * <li>--maxPrefsPerUserConsidered (integer): Maximum number of preferences
considered per user in
+ * final recommendation phase (10)</li>
+ * <li>--maxCooccurrencesPerItemConsidered: Maximum number of cooccurrences
considered per item
+ * in count phase (100)</li>
* </ol>
*
* <p>General command line options are documented in {...@link
AbstractJob}.</p>
@@ -69,14 +74,22 @@ public final class RecommenderJob extend
public int run(String[] args) throws IOException, ClassNotFoundException,
InterruptedException {
Option numReccomendationsOpt =
AbstractJob.buildOption("numRecommendations", "n",
- "Number of recommendations per user", "10");
+ "Number of recommendations per user",
+
String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
Option usersFileOpt = AbstractJob.buildOption("usersFile", "u",
"File of users to recommend for", null);
Option booleanDataOpt = AbstractJob.buildOption("booleanData", "b",
"Treat input as without pref values", Boolean.FALSE.toString());
+ Option maxPrefsPerUserConsideredOpt =
AbstractJob.buildOption("maxPrefsPerUserConsidered", null,
+ "Maximum number of preferences considered per user in final
recommendation phase",
+
String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED));
+ Option maxCooccurrencesPerItemConsideredOpt =
AbstractJob.buildOption("maxCooccurrencesPerItemConsidered", null,
+ "Maximum number of cooccurrences considered per item in count phase",
+
String.valueOf(UserVectorToCooccurrenceMapper.DEFAULT_MAX_COOCCURRENCES_PER_ITEM_CONSIDERED));
Map<String,String> parsedArgs = AbstractJob.parseArguments(
- args, numReccomendationsOpt, usersFileOpt, booleanDataOpt);
+ args, numReccomendationsOpt, usersFileOpt, booleanDataOpt,
+ maxPrefsPerUserConsideredOpt, maxCooccurrencesPerItemConsideredOpt);
if (parsedArgs == null) {
return -1;
}
@@ -85,10 +98,12 @@ public final class RecommenderJob extend
Path inputPath = new Path(originalConf.get("mapred.input.dir"));
Path outputPath = new Path(originalConf.get("mapred.output.dir"));
Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
- int recommendationsPerUser =
Integer.parseInt(parsedArgs.get("--numRecommendations"));
+ int numRecommendations =
Integer.parseInt(parsedArgs.get("--numRecommendations"));
String usersFile = parsedArgs.get("--usersFile");
boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
-
+ int maxPrefsPerUserConsidered =
Integer.parseInt(parsedArgs.get("--maxPrefsPerUserConsidered"));
+ int maxCooccurrencesPerItemConsidered =
Integer.parseInt(parsedArgs.get("--maxCooccurrencesPerItemConsidered"));
+
Path userVectorPath = new Path(tempDirPath, "userVectors");
Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex");
Path cooccurrencePath = new Path(tempDirPath, "cooccurrence");
@@ -125,6 +140,8 @@ public final class RecommenderJob extend
UserVectorToCooccurrenceReducer.class, VarIntWritable.class,
VectorWritable.class,
SequenceFileOutputFormat.class);
setIOSort(toCooccurrence);
+
toCooccurrence.getConfiguration().setInt(UserVectorToCooccurrenceMapper.MAX_COOCCURRENCES_PER_ITEM_CONSIDERED,
+
maxCooccurrencesPerItemConsidered);
toCooccurrence.waitForCompletion(true);
}
@@ -144,6 +161,8 @@ public final class RecommenderJob extend
if (usersFile != null) {
prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE,
usersFile);
}
+
prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
+ maxPrefsPerUserConsidered);
prePartialMultiply2.waitForCompletion(true);
Job partialMultiply = prepareJob(
@@ -165,14 +184,14 @@ public final class RecommenderJob extend
setIOSort(aggregateAndRecommend);
aggregateAndRecommend.setCombinerClass(AggregateCombiner.class);
jobConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
itemIDIndexPath.toString());
- jobConf.setInt(AggregateAndRecommendReducer.RECOMMENDATIONS_PER_USER,
recommendationsPerUser);
+ jobConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS,
numRecommendations);
aggregateAndRecommend.waitForCompletion(true);
}
return 0;
}
- private static void setIOSort(Job job) {
+ private static void setIOSort(JobContext job) {
Configuration conf = job.getConfiguration();
conf.setInt("io.sort.factor", 100);
int assumedHeapSize = 512;
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java?rev=950049&r1=950048&r2=950049&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorSplitterMapper.java
Tue Jun 1 12:55:49 2010
@@ -37,13 +37,17 @@ public final class UserVectorSplitterMap
Mapper<VarLongWritable,VectorWritable,
VarIntWritable,VectorOrPrefWritable> {
static final String USERS_FILE = "usersFile";
- private static final int MAX_PREFS_CONSIDERED = 10;
+ static final String MAX_PREFS_PER_USER_CONSIDERED =
"maxPrefsPerUserConsidered";
+ static final int DEFAULT_MAX_PREFS_PER_USER_CONSIDERED = 10;
+ private int maxPrefsPerUserConsidered;
private FastIDSet usersToRecommendFor;
@Override
protected void setup(Context context) {
Configuration jobConf = context.getConfiguration();
+ maxPrefsPerUserConsidered = jobConf.getInt(MAX_PREFS_PER_USER_CONSIDERED,
+
DEFAULT_MAX_PREFS_PER_USER_CONSIDERED);
try {
FileSystem fs = FileSystem.get(jobConf);
String usersFilePathString = jobConf.get(USERS_FILE);
@@ -82,8 +86,8 @@ public final class UserVectorSplitterMap
}
}
- private static Vector maybePruneUserVector(Vector userVector) {
- if (userVector.getNumNondefaultElements() <= MAX_PREFS_CONSIDERED) {
+ private Vector maybePruneUserVector(Vector userVector) {
+ if (userVector.getNumNondefaultElements() <= maxPrefsPerUserConsidered) {
return userVector;
}
@@ -104,12 +108,12 @@ public final class UserVectorSplitterMap
return userVector;
}
- private static float findSmallestLargeValue(Vector userVector) {
- PriorityQueue<Float> topPrefValues = new
PriorityQueue<Float>(MAX_PREFS_CONSIDERED + 1);
+ private float findSmallestLargeValue(Vector userVector) {
+ PriorityQueue<Float> topPrefValues = new
PriorityQueue<Float>(maxPrefsPerUserConsidered + 1);
Iterator<Vector.Element> it = userVector.iterateNonZero();
while (it.hasNext()) {
float absValue = Math.abs((float) it.next().get());
- if (topPrefValues.size() < MAX_PREFS_CONSIDERED) {
+ if (topPrefValues.size() < maxPrefsPerUserConsidered) {
topPrefValues.add(absValue);
} else {
if (absValue > topPrefValues.peek()) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java?rev=950049&r1=950048&r2=950049&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/item/UserVectorToCooccurrenceMapper.java
Tue Jun 1 12:55:49 2010
@@ -32,15 +32,24 @@ import org.apache.mahout.math.map.OpenIn
public final class UserVectorToCooccurrenceMapper extends
Mapper<VarLongWritable,VectorWritable,VarIntWritable,VarIntWritable> {
- private static final int MAX_PREFS_CONSIDERED = 100;
+ static final String MAX_COOCCURRENCES_PER_ITEM_CONSIDERED =
"maxCooccurrencesPerItemConsidered";
+ static final int DEFAULT_MAX_COOCCURRENCES_PER_ITEM_CONSIDERED = 100;
private enum Counters {
USER_PREFS_SKIPPED,
}
+ private int maxCooccurrencesPerItemConsidered;
private final OpenIntIntHashMap indexCounts = new OpenIntIntHashMap();
@Override
+ protected void setup(Context context) {
+ maxCooccurrencesPerItemConsidered =
+
context.getConfiguration().getInt(MAX_COOCCURRENCES_PER_ITEM_CONSIDERED,
+
DEFAULT_MAX_COOCCURRENCES_PER_ITEM_CONSIDERED);
+ }
+
+ @Override
protected void map(VarLongWritable userID,
VectorWritable userVectorWritable,
Context context) throws IOException, InterruptedException
{
@@ -71,18 +80,18 @@ public final class UserVectorToCooccurre
}
private Vector maybePruneUserVector(Vector userVector) {
- if (userVector.getNumNondefaultElements() <= MAX_PREFS_CONSIDERED) {
+ if (userVector.getNumNondefaultElements() <=
maxCooccurrencesPerItemConsidered) {
return userVector;
}
PriorityQueue<Integer> smallCounts =
- new PriorityQueue<Integer>(MAX_PREFS_CONSIDERED + 1,
Collections.reverseOrder());
+ new PriorityQueue<Integer>(maxCooccurrencesPerItemConsidered + 1,
Collections.reverseOrder());
Iterator<Vector.Element> it = userVector.iterateNonZero();
while (it.hasNext()) {
int count = indexCounts.get(it.next().index());
if (count > 0) {
- if (smallCounts.size() < MAX_PREFS_CONSIDERED) {
+ if (smallCounts.size() < maxCooccurrencesPerItemConsidered) {
smallCounts.add(count);
} else if (count < smallCounts.peek()) {
smallCounts.add(count);