Author: jeastman
Date: Sun Feb 12 19:03:59 2012
New Revision: 1243298
URL: http://svn.apache.org/viewvc?rev=1243298&view=rev
Log:
Reformatted using Eclipse-Lucene-Codestyle but with 120 char text width
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterWritable.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIMapper.java?rev=1243298&r1=1243297&r2=1243298&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIMapper.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIMapper.java
Sun Feb 12 19:03:59 2012
@@ -12,17 +12,18 @@ import org.apache.mahout.math.Vector;
import org.apache.mahout.math.Vector.Element;
import org.apache.mahout.math.VectorWritable;
-public class CIMapper extends
- Mapper<WritableComparable<?>,VectorWritable,IntWritable,ClusterWritable> {
+public class CIMapper extends
Mapper<WritableComparable<?>,VectorWritable,IntWritable,ClusterWritable> {
private ClusterClassifier classifier;
-
+
private ClusteringPolicy policy;
-
+
/*
* (non-Javadoc)
*
- * @see
org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper
.Context)
+ * @see
+ *
org.apache.hadoop.mapreduce.Mapper#setup(org.apache.hadoop.mapreduce.Mapper
+ * .Context)
*/
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
@@ -31,12 +32,12 @@ public class CIMapper extends
policy = new KMeansClusteringPolicy();
super.setup(context);
}
-
+
/*
* (non-Javadoc)
*
- * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object,
java.lang.Object,
- * org.apache.hadoop.mapreduce.Mapper.Context)
+ * @see org.apache.hadoop.mapreduce.Mapper#map(java.lang.Object,
+ * java.lang.Object, org.apache.hadoop.mapreduce.Mapper.Context)
*/
@Override
protected void map(WritableComparable<?> key, VectorWritable value, Context
context) throws IOException,
@@ -48,11 +49,13 @@ public class CIMapper extends
classifier.train(el.index(), value.get(), el.get());
}
}
-
+
/*
* (non-Javadoc)
*
- * @see
org.apache.hadoop.mapreduce.Mapper#cleanup(org.apache.hadoop.mapreduce.
Mapper.Context)
+ * @see
+ * org.apache.hadoop.mapreduce.Mapper#cleanup(org.apache.hadoop.mapreduce.
+ * Mapper.Context)
*/
@Override
protected void cleanup(Context context) throws IOException,
InterruptedException {
@@ -64,5 +67,5 @@ public class CIMapper extends
}
super.cleanup(context);
}
-
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIReducer.java?rev=1243298&r1=1243297&r2=1243298&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIReducer.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/clustering/CIReducer.java
Sun Feb 12 19:03:59 2012
@@ -24,15 +24,15 @@ import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Reducer;
public class CIReducer extends
Reducer<IntWritable,ClusterWritable,IntWritable,ClusterWritable> {
-
+
@Override
- protected void reduce(IntWritable key, Iterable<ClusterWritable> values,
- Context context) throws IOException, InterruptedException {
- Iterator<ClusterWritable> iter =values.iterator();
+ protected void reduce(IntWritable key, Iterable<ClusterWritable> values,
Context context) throws IOException,
+ InterruptedException {
+ Iterator<ClusterWritable> iter = values.iterator();
ClusterWritable first = null;
- while(iter.hasNext()){
+ while (iter.hasNext()) {
ClusterWritable cw = iter.next();
- if (first == null){
+ if (first == null) {
first = cw;
} else {
first.getValue().observe(cw.getValue());
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java?rev=1243298&r1=1243297&r2=1243298&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterIterator.java
Sun Feb 12 19:03:59 2012
@@ -45,26 +45,32 @@ import com.google.common.collect.Lists;
import com.google.common.io.Closeables;
/**
- * This is an experimental clustering iterator which works with a
ClusteringPolicy and a prior ClusterClassifier which
- * has been initialized with a set of models. To date, it has been tested with
k-means and Dirichlet clustering. See
- * examples DisplayKMeans and DisplayDirichlet which have been switched over
to use it.
+ * This is an experimental clustering iterator which works with a
+ * ClusteringPolicy and a prior ClusterClassifier which has been initialized
+ * with a set of models. To date, it has been tested with k-means and Dirichlet
+ * clustering. See examples DisplayKMeans and DisplayDirichlet which have been
+ * switched over to use it.
*/
public class ClusterIterator {
-
+
public static final String PRIOR_PATH_KEY =
"org.apache.mahout.clustering.prior.path";
-
+
public ClusterIterator(ClusteringPolicy policy) {
this.policy = policy;
}
-
+
private final ClusteringPolicy policy;
-
+
/**
- * Iterate over data using a prior-trained ClusterClassifier, for a number
of iterations
+ * Iterate over data using a prior-trained ClusterClassifier, for a number of
+ * iterations
*
- * @param data a {@code List<Vector>} of input vectors
- * @param classifier a prior ClusterClassifier
- * @param numIterations the int number of iterations to perform
+ * @param data
+ * a {@code List<Vector>} of input vectors
+ * @param classifier
+ * a prior ClusterClassifier
+ * @param numIterations
+ * the int number of iterations to perform
* @return the posterior ClusterClassifier
*/
public ClusterClassifier iterate(Iterable<Vector> data, ClusterClassifier
classifier, int numIterations) {
@@ -87,15 +93,19 @@ public class ClusterIterator {
}
return classifier;
}
-
+
/**
- * Iterate over data using a prior-trained ClusterClassifier, for a number
of iterations using a sequential
- * implementation
+ * Iterate over data using a prior-trained ClusterClassifier, for a number of
+ * iterations using a sequential implementation
*
- * @param inPath a Path to input VectorWritables
- * @param priorPath a Path to the prior classifier
- * @param outPath a Path of output directory
- * @param numIterations the int number of iterations to perform
+ * @param inPath
+ * a Path to input VectorWritables
+ * @param priorPath
+ * a Path to the prior classifier
+ * @param outPath
+ * a Path of output directory
+ * @param numIterations
+ * the int number of iterations to perform
* @throws IOException
*/
public void iterateSeq(Path inPath, Path priorPath, Path outPath, int
numIterations) throws IOException {
@@ -123,15 +133,19 @@ public class ClusterIterator {
writeClassifier(classifier, new Path(outPath, "classifier-" +
iteration));
}
}
-
+
/**
- * Iterate over data using a prior-trained ClusterClassifier, for a number
of iterations using a mapreduce
- * implementation
+ * Iterate over data using a prior-trained ClusterClassifier, for a number of
+ * iterations using a mapreduce implementation
*
- * @param inPath a Path to input VectorWritables
- * @param priorPath a Path to the prior classifier
- * @param outPath a Path of output directory
- * @param numIterations the int number of iterations to perform
+ * @param inPath
+ * a Path to input VectorWritables
+ * @param priorPath
+ * a Path to the prior classifier
+ * @param outPath
+ * a Path of output directory
+ * @param numIterations
+ * the int number of iterations to perform
*/
public void iterateMR(Path inPath, Path priorPath, Path outPath, int
numIterations) throws IOException,
InterruptedException, ClassNotFoundException {
@@ -139,7 +153,7 @@ public class ClusterIterator {
HadoopUtil.delete(conf, outPath);
for (int iteration = 1; iteration <= numIterations; iteration++) {
conf.set(PRIOR_PATH_KEY, priorPath.toString());
-
+
String jobName = "Cluster Iterator running iteration " + iteration + "
over priorPath: " + priorPath;
System.out.println(jobName);
Job job = new Job(conf, jobName);
@@ -147,17 +161,17 @@ public class ClusterIterator {
job.setMapOutputValueClass(ClusterWritable.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(ClusterWritable.class);
-
+
job.setInputFormatClass(SequenceFileInputFormat.class);
job.setOutputFormatClass(SequenceFileOutputFormat.class);
job.setMapperClass(CIMapper.class);
job.setReducerClass(CIReducer.class);
-
+
FileInputFormat.addInputPath(job, inPath);
Path clustersOut = new Path(outPath, "clusters-" + iteration);
priorPath = clustersOut;
FileOutputFormat.setOutputPath(job, clustersOut);
-
+
job.setJarByClass(ClusterIterator.class);
if (!job.waitForCompletion(true)) {
throw new InterruptedException("Cluster Iteration " + iteration + "
failed processing " + priorPath);
@@ -168,13 +182,16 @@ public class ClusterIterator {
}
}
}
-
+
/**
- * Return if all of the Clusters in the parts in the filePath have converged
or not
+ * Return if all of the Clusters in the parts in the filePath have converged
+ * or not
*
- * @param filePath the file path to the single file containing the clusters
+ * @param filePath
+ * the file path to the single file containing the clusters
* @return true if all Clusters are converged
- * @throws IOException if there was an IO error
+ * @throws IOException
+ * if there was an IO error
*/
private boolean isConverged(Path filePath, Configuration conf, FileSystem
fs) throws IOException {
for (FileStatus part : fs.listStatus(filePath, PathFilters.partFilter())) {
@@ -190,7 +207,7 @@ public class ClusterIterator {
}
return true;
}
-
+
public static void writeClassifier(ClusterClassifier classifier, Path
outPath) throws IOException {
Configuration config = new Configuration();
FileSystem fs = FileSystem.get(outPath.toUri(), config);
@@ -209,7 +226,7 @@ public class ClusterIterator {
}
}
}
-
+
public static ClusterClassifier readClassifier(Path inPath) throws
IOException {
Configuration config = new Configuration();
List<Cluster> clusters = Lists.newArrayList();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterWritable.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterWritable.java?rev=1243298&r1=1243297&r2=1243298&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterWritable.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/ClusterWritable.java
Sun Feb 12 19:03:59 2012
@@ -24,25 +24,25 @@ import org.apache.hadoop.io.Writable;
import org.apache.mahout.classifier.sgd.PolymorphicWritable;
public class ClusterWritable implements Writable {
-
+
private Cluster value;
-
+
public Cluster getValue() {
return value;
}
-
+
public void setValue(Cluster value) {
this.value = value;
}
-
+
@Override
public void write(DataOutput out) throws IOException {
PolymorphicWritable.write(out, value);
}
-
+
@Override
public void readFields(DataInput in) throws IOException {
value = PolymorphicWritable.read(in, Cluster.class);
}
-
+
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java?rev=1243298&r1=1243297&r2=1243298&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/TestClusterClassifier.java
Sun Feb 12 19:03:59 2012
@@ -39,7 +39,7 @@ import org.junit.Test;
import com.google.common.collect.Lists;
public final class TestClusterClassifier extends MahoutTestCase {
-
+
private static ClusterClassifier newDMClassifier() {
List<Cluster> models = Lists.newArrayList();
DistanceMeasure measure = new ManhattanDistanceMeasure();
@@ -48,7 +48,7 @@ public final class TestClusterClassifier
models.add(new DistanceMeasureCluster(new DenseVector(2).assign(-1), 2,
measure));
return new ClusterClassifier(models);
}
-
+
private static ClusterClassifier newClusterClassifier() {
List<Cluster> models = Lists.newArrayList();
DistanceMeasure measure = new ManhattanDistanceMeasure();
@@ -57,7 +57,7 @@ public final class TestClusterClassifier
models.add(new org.apache.mahout.clustering.kmeans.Cluster(new
DenseVector(2).assign(-1), 2, measure));
return new ClusterClassifier(models);
}
-
+
private static ClusterClassifier newSoftClusterClassifier() {
List<Cluster> models = Lists.newArrayList();
DistanceMeasure measure = new ManhattanDistanceMeasure();
@@ -66,7 +66,7 @@ public final class TestClusterClassifier
models.add(new SoftCluster(new DenseVector(2).assign(-1), 2, measure));
return new ClusterClassifier(models);
}
-
+
private static ClusterClassifier newGaussianClassifier() {
List<Cluster> models = Lists.newArrayList();
models.add(new GaussianCluster(new DenseVector(2).assign(1), new
DenseVector(2).assign(1), 0));
@@ -74,13 +74,13 @@ public final class TestClusterClassifier
models.add(new GaussianCluster(new DenseVector(2).assign(-1), new
DenseVector(2).assign(1), 2));
return new ClusterClassifier(models);
}
-
+
private ClusterClassifier writeAndRead(ClusterClassifier classifier) throws
IOException {
Path path = new Path(getTestTempDirPath(), "output");
ClusterIterator.writeClassifier(classifier, path);
return ClusterIterator.readClassifier(path);
}
-
+
@Test
public void testDMClusterClassification() {
ClusterClassifier classifier = newDMClassifier();
@@ -89,7 +89,7 @@ public final class TestClusterClassifier
pdf = classifier.classify(new DenseVector(2).assign(2));
assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
-
+
@Test
public void testCanopyClassification() {
List<Cluster> models = Lists.newArrayList();
@@ -103,7 +103,7 @@ public final class TestClusterClassifier
pdf = classifier.classify(new DenseVector(2).assign(2));
assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
-
+
@Test
public void testClusterClassification() {
ClusterClassifier classifier = newClusterClassifier();
@@ -112,7 +112,7 @@ public final class TestClusterClassifier
pdf = classifier.classify(new DenseVector(2).assign(2));
assertEquals("[2,2]", "[0.493, 0.296, 0.211]",
AbstractCluster.formatVector(pdf, null));
}
-
+
@Test(expected = UnsupportedOperationException.class)
public void testMSCanopyClassification() {
List<Cluster> models = Lists.newArrayList();
@@ -123,7 +123,7 @@ public final class TestClusterClassifier
ClusterClassifier classifier = new ClusterClassifier(models);
classifier.classify(new DenseVector(2));
}
-
+
@Test
public void testSoftClusterClassification() {
ClusterClassifier classifier = newSoftClusterClassifier();
@@ -132,7 +132,7 @@ public final class TestClusterClassifier
pdf = classifier.classify(new DenseVector(2).assign(2));
assertEquals("[2,2]", "[0.735, 0.184, 0.082]",
AbstractCluster.formatVector(pdf, null));
}
-
+
@Test
public void testGaussianClusterClassification() {
ClusterClassifier classifier = newGaussianClassifier();
@@ -141,7 +141,7 @@ public final class TestClusterClassifier
pdf = classifier.classify(new DenseVector(2).assign(2));
assertEquals("[2,2]", "[0.952, 0.047, 0.000]",
AbstractCluster.formatVector(pdf, null));
}
-
+
@Test
public void testDMClassifierSerialization() throws Exception {
ClusterClassifier classifier = newDMClassifier();
@@ -150,7 +150,7 @@ public final class TestClusterClassifier
assertEquals(classifier.getModels().get(0).getClass().getName(),
classifierOut.getModels().get(0).getClass()
.getName());
}
-
+
@Test
public void testClusterClassifierSerialization() throws Exception {
ClusterClassifier classifier = newClusterClassifier();
@@ -159,7 +159,7 @@ public final class TestClusterClassifier
assertEquals(classifier.getModels().get(0).getClass().getName(),
classifierOut.getModels().get(0).getClass()
.getName());
}
-
+
@Test
public void testSoftClusterClassifierSerialization() throws Exception {
ClusterClassifier classifier = newSoftClusterClassifier();
@@ -168,7 +168,7 @@ public final class TestClusterClassifier
assertEquals(classifier.getModels().get(0).getClass().getName(),
classifierOut.getModels().get(0).getClass()
.getName());
}
-
+
@Test
public void testGaussianClassifierSerialization() throws Exception {
ClusterClassifier classifier = newGaussianClassifier();
@@ -177,7 +177,7 @@ public final class TestClusterClassifier
assertEquals(classifier.getModels().get(0).getClass().getName(),
classifierOut.getModels().get(0).getClass()
.getName());
}
-
+
@Test
public void testClusterIteratorKMeans() {
List<Vector> data =
TestKmeansClustering.getPoints(TestKmeansClustering.REFERENCE);
@@ -190,7 +190,7 @@ public final class TestClusterClassifier
System.out.println(cluster.asFormatString(null));
}
}
-
+
@Test
public void testClusterIteratorDirichlet() {
List<Vector> data =
TestKmeansClustering.getPoints(TestKmeansClustering.REFERENCE);
@@ -203,7 +203,7 @@ public final class TestClusterClassifier
System.out.println(cluster.asFormatString(null));
}
}
-
+
@Test
public void testSeqFileClusterIteratorKMeans() throws IOException {
Path pointsPath = getTestTempDirPath("points");
@@ -224,7 +224,7 @@ public final class TestClusterClassifier
ClusteringPolicy policy = new KMeansClusteringPolicy();
ClusterIterator iterator = new ClusterIterator(policy);
iterator.iterateSeq(pointsPath, path, outPath, 5);
-
+
for (int i = 1; i <= 5; i++) {
System.out.println("Classifier-" + i);
ClusterClassifier posterior = ClusterIterator.readClassifier(new
Path(outPath, "classifier-" + i));
@@ -232,10 +232,10 @@ public final class TestClusterClassifier
for (Cluster cluster : posterior.getModels()) {
System.out.println(cluster.asFormatString(null));
}
-
+
}
}
-
+
@Test
public void testMRFileClusterIteratorKMeans() throws IOException,
InterruptedException, ClassNotFoundException {
Path pointsPath = getTestTempDirPath("points");
@@ -256,7 +256,7 @@ public final class TestClusterClassifier
ClusteringPolicy policy = new KMeansClusteringPolicy();
ClusterIterator iterator = new ClusterIterator(policy);
iterator.iterateMR(pointsPath, path, outPath, 5);
-
+
for (int i = 1; i <= 5; i++) {
System.out.println("Classifier-" + i);
ClusterClassifier posterior = ClusterIterator.readClassifier(new
Path(outPath, "clusters-" + i));
@@ -264,7 +264,7 @@ public final class TestClusterClassifier
for (Cluster cluster : posterior.getModels()) {
System.out.println(cluster.asFormatString(null));
}
-
+
}
}
}