Author: robinanil
Date: Sun Jun 3 18:34:59 2012
New Revision: 1345735
URL: http://svn.apache.org/viewvc?rev=1345735&view=rev
Log:
MAHOUT-1006 making end to end example work
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/AbstractNaiveBayesClassifier.java
Sun Jun 3 18:34:59 2012
@@ -55,9 +55,7 @@ public abstract class AbstractNaiveBayes
@Override
public Vector classifyFull(Vector instance) {
- System.out.println(1);
Vector score = model.createScoringVector();
- System.out.println(score.size());
for (int label = 0; label < model.numLabels(); label++) {
score.set(label, getScoreForLabelInstance(label, instance));
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/BayesUtils.java
Sun Jun 3 18:34:59 2012
@@ -116,7 +116,7 @@ public final class BayesUtils {
int i = 0;
try {
for (Object label : labels) {
- String theLabel = ((Pair<?,?>) label).getFirst().toString();
+ String theLabel = ((Pair<?,?>)
label).getFirst().toString().split("/")[1];
if (!seen.contains(theLabel)) {
writer.append(new Text(theLabel), new IntWritable(i++));
seen.add(theLabel);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/ComplementaryNaiveBayesClassifier.java
Sun Jun 3 18:34:59 2012
@@ -36,5 +36,4 @@ public class ComplementaryNaiveBayesClas
return Math.log(numerator / denominator);
}
-
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/BayesTestMapper.java
Sun Jun 3 18:34:59 2012
@@ -59,6 +59,6 @@ public class BayesTestMapper extends Map
protected void map(Text key, VectorWritable value, Context context) throws
IOException, InterruptedException {
Vector result = classifier.classifyFull(value.get());
//the key is the expected value
- context.write(key, new VectorWritable(result));
+ context.write(new Text(key.toString().split("/")[1]), new
VectorWritable(result));
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/test/TestNaiveBayesDriver.java
Sun Jun 3 18:34:59 2012
@@ -99,7 +99,8 @@ public class TestNaiveBayesDriver extend
Text key = new Text();
VectorWritable vw = new VectorWritable();
while (reader.next(key, vw)) {
- writer.append(key, new
VectorWritable(classifier.classifyFull(vw.get())));
+ writer.append(new Text(key.toString().split("/")[1]),
+ new VectorWritable(classifier.classifyFull(vw.get())));
}
writer.close();
reader.close();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/IndexInstancesMapper.java
Sun Jun 3 18:34:59 2012
@@ -40,7 +40,7 @@ public class IndexInstancesMapper extend
@Override
protected void map(Text labelText, VectorWritable instance, Context ctx)
throws IOException, InterruptedException {
- String label = labelText.toString();
+ String label = labelText.toString().split("/")[1];
if (labelIndex.containsKey(label)) {
ctx.write(new IntWritable(labelIndex.get(label)), instance);
} else {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/ThetaMapper.java
Sun Jun 3 18:34:59 2012
@@ -41,7 +41,7 @@ public class ThetaMapper extends Mapper<
Configuration conf = ctx.getConfiguration();
float alphaI = conf.getFloat(ALPHA_I, 1.0f);
- Map<String,Vector> scores = BayesUtils.readScoresFromCache(conf);
+ Map<String, Vector> scores = BayesUtils.readScoresFromCache(conf);
if (conf.getBoolean(TRAIN_COMPLEMENTARY, false)) {
trainer = new
ComplementaryThetaTrainer(scores.get(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE),
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/TrainNaiveBayesJob.java
Sun Jun 3 18:34:59 2012
@@ -112,8 +112,10 @@ public final class TrainNaiveBayesJob ex
if (!succeeded) {
return -1;
}
+
//put the per label and per feature vectors into the cache
HadoopUtil.cacheFiles(getTempPath(WEIGHTS), getConf());
+
//calculate the Thetas, write out to LABEL_THETA_NORMALIZER vectors --
TODO: add reference here to the part of the Rennie paper that discusses this
Job thetaSummer = prepareJob(getTempPath(SUMMED_OBSERVATIONS),
getTempPath(THETAS),
SequenceFileInputFormat.class, ThetaMapper.class, Text.class,
VectorWritable.class, VectorSumReducer.class,
@@ -125,6 +127,7 @@ public final class TrainNaiveBayesJob ex
if (!succeeded) {
return -1;
}
+
//validate our model and then write it out to the official output
NaiveBayesModel naiveBayesModel =
BayesUtils.readModelFromDir(getTempPath(), getConf());
naiveBayesModel.validate();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java?rev=1345735&r1=1345734&r2=1345735&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/naivebayes/training/WeightsMapper.java
Sun Jun 3 18:34:59 2012
@@ -51,7 +51,6 @@ public class WeightsMapper extends Mappe
}
int label = index.get();
-// instance.addTo(weightsPerFeature);
weightsPerFeature.assign(instance, Functions.PLUS);
weightsPerLabel.set(label, weightsPerLabel.get(label) + instance.zSum());
}