Author: adeneche
Date: Sun Oct 9 04:40:44 2011
New Revision: 1180544
URL: http://svn.apache.org/viewvc?rev=1180544&view=rev
Log:
Fixed a small bug in DecisionForest Bagging.build()
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java (original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/df/Bagging.java Sun Oct
9 04:40:44 2011
@@ -23,6 +23,7 @@ import java.util.Random;
import org.apache.mahout.df.builder.TreeBuilder;
import org.apache.mahout.df.callback.PredictionCallback;
import org.apache.mahout.df.data.Data;
+import org.apache.mahout.df.data.Instance;
import org.apache.mahout.df.node.Node;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -65,8 +66,9 @@ public class Bagging {
log.debug("Oob error estimation");
for (int index = 0; index < data.size(); index++) {
if (!sampled[index]) {
- int prediction = tree.classify(data.get(index));
- callback.prediction(treeId, index, prediction);
+ Instance instance = data.get(index);
+ int prediction = tree.classify(instance);
+ callback.prediction(treeId, instance.getId(), prediction);
}
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step1Mapper.java
Sun Oct 9 04:40:44 2011
@@ -65,6 +65,9 @@ public class Step1Mapper extends MapredM
/** will contain all instances if this mapper's split */
private final List<Instance> instances = Lists.newArrayList();
+ /** current instance's id */
+ private int id;
+
public int getFirstTreeId() {
return firstTreeId;
}
@@ -140,7 +143,7 @@ public class Step1Mapper extends MapredM
@Override
protected void map(LongWritable key, Text value, Context context) throws
IOException, InterruptedException {
- instances.add(converter.convert((int) key.get(), value.toString()));
+ instances.add(converter.convert(id++, value.toString()));
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
Sun Oct 9 04:40:44 2011
@@ -48,7 +48,7 @@ public final class OutputUtils {
public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws
IOException {
Collection<Path> outpaths = Lists.newArrayList();
for (FileStatus s : fs.listStatus(outpath, PathFilters.logsCRCFilter())) {
- if (!s.isDir()) {
+ if (!s.isDir() && !s.getPath().getName().startsWith("_")) {
outpaths.add(s.getPath());
}
}
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java?rev=1180544&r1=1180543&r2=1180544&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/df/BreimanExample.java
Sun Oct 9 04:40:44 2011
@@ -99,7 +99,7 @@ public class BreimanExample extends Conf
Data train = data.clone();
Data test = train.rsplit(rng, (int) (data.size() * 0.1));
- int[] trainLabels = train.extractLabels();
+ int[] labels = data.extractLabels();
int[] testLabels = test.extractLabels();
DefaultTreeBuilder treeBuilder = new DefaultTreeBuilder();
@@ -107,8 +107,8 @@ public class BreimanExample extends Conf
SequentialBuilder forestBuilder = new SequentialBuilder(rng, treeBuilder,
train);
// grow a forest with m = log2(M)+1
- ForestPredictions errorM = new ForestPredictions(train.size(), nblabels);
// oob error when using m =
-
// log2(M)+1
+ ForestPredictions errorM = new ForestPredictions(data.size(), nblabels);
// oob error when using m =
+
// log2(M)+1
treeBuilder.setM(m);
long time = System.currentTimeMillis();
@@ -117,11 +117,11 @@ public class BreimanExample extends Conf
sumTimeM += System.currentTimeMillis() - time;
numNodesM += forestM.nbNodes();
- double oobM = ErrorEstimate.errorRate(trainLabels,
errorM.computePredictions(rng)); // oob error estimate
-
// when m = log2(M)+1
+ double oobM = ErrorEstimate.errorRate(labels,
errorM.computePredictions(rng)); // oob error estimate
+
// when m = log2(M)+1
// grow a forest with m=1
- ForestPredictions errorOne = new ForestPredictions(train.size(),
nblabels); // oob error when using m = 1
+ ForestPredictions errorOne = new ForestPredictions(data.size(), nblabels);
// oob error when using m = 1
treeBuilder.setM(1);
time = System.currentTimeMillis();
@@ -130,9 +130,9 @@ public class BreimanExample extends Conf
sumTimeOne += System.currentTimeMillis() - time;
numNodesOne += forestOne.nbNodes();
- double oobOne = ErrorEstimate.errorRate(trainLabels,
errorOne.computePredictions(rng)); // oob error
-
// estimate when m
-
// = 1
+ double oobOne = ErrorEstimate.errorRate(labels,
errorOne.computePredictions(rng)); // oob error
+
// estimate when m
+
// = 1
// compute the test set error (Selection Error), and mean tree error (One
Tree Error),
// using the lowest oob error forest