Author: jeastman
Date: Sun Apr 10 18:10:50 2011
New Revision: 1090861
URL: http://svn.apache.org/viewvc?rev=1090861&view=rev
Log:
MAHOUT-552: Added static initialCanopy method to create initial canopies with
original center type. Added to unit test. All tests run.
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopy.java
Sun Apr 10 18:10:50 2011
@@ -55,6 +55,20 @@ public class MeanShiftCanopy extends Clu
super(point, id, measure);
boundPoints.add(id);
}
+
+ /**
+ * Create an initial Canopy, retaining the original type of the given point
(e.g. NamedVector)
+ * @param point a Vector
+ * @param id an int
+ * @param measure a DistanceMeasure
+ * @return a MeanShiftCanopy
+ */
+ public static MeanShiftCanopy initialCanopy(Vector point, int id,
DistanceMeasure measure){
+ MeanShiftCanopy result = new MeanShiftCanopy(point, id, measure);
+ // overwrite center so original point type is retained
+ result.setCenter(point);
+ return result;
+ }
/**
* Create a new Canopy containing the given point, id and bound points
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyCreatorMapper.java
Sun Apr 10 18:10:50 2011
@@ -38,7 +38,7 @@ public class MeanShiftCanopyCreatorMappe
@Override
protected void map(WritableComparable<?> key, VectorWritable point, Context
context) throws IOException, InterruptedException {
- MeanShiftCanopy canopy = new MeanShiftCanopy(point.get(), nextCanopyId++,
measure);
+ MeanShiftCanopy canopy = MeanShiftCanopy.initialCanopy(point.get(),
nextCanopyId++, measure);
context.write(new Text(key.toString()), canopy);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java
Sun Apr 10 18:10:50 2011
@@ -209,7 +209,7 @@ public class MeanShiftCanopyDriver exten
MeanShiftCanopy.class);
try {
for (VectorWritable value : new
SequenceFileValueIterable<VectorWritable>(s.getPath(), conf)) {
- writer.append(new Text(), new MeanShiftCanopy(value.get(), id++,
measure));
+ writer.append(new Text(), MeanShiftCanopy.initialCanopy(value.get(),
id++, measure));
}
} finally {
writer.close();
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1090861&r1=1090860&r2=1090861&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
Sun Apr 10 18:10:50 2011
@@ -21,6 +21,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
+import java.util.Iterator;
import java.util.List;
import java.util.Map;
@@ -28,6 +29,7 @@ import org.apache.hadoop.conf.Configurat
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
@@ -39,6 +41,7 @@ import org.apache.mahout.common.MahoutTe
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.distance.DistanceMeasure;
import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import
org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterator;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
@@ -327,6 +330,13 @@ public final class TestMeanShift extends
Path outPart = new Path(output, "clusters-3/part-r-00000");
long count = HadoopUtil.countRecords(outPart, conf);
assertEquals("count", 3, count);
+ outPart = new Path(output, "clusters-0/part-m-00000");
+ Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart,
true, conf);
+ // now test the initial clusters to ensure the type of their centers
has been retained
+ while (iterator.hasNext()) {
+ MeanShiftCanopy canopy = (MeanShiftCanopy) iterator.next();
+ assertTrue(canopy.getCenter()instanceof DenseVector);
+ }
}
/**