Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyClusterer.java Sat Feb 13 21:07:53 2010 @@ -12,7 +12,7 @@ import org.apache.mahout.math.Vector; public class MeanShiftCanopyClusterer { - + private double convergenceDelta = 0; // the next canopyId to be allocated private int nextCanopyId = 0; @@ -26,6 +26,7 @@ public double getT1() { return t1; } + public double getT2() { return t2; } @@ -37,15 +38,17 @@ public MeanShiftCanopyClusterer(JobConf job) { configure(job); } + /** * Configure the Canopy and its distance measure - * - * @param job the JobConf for this job + * + * @param job + * the JobConf for this job */ public void configure(JobConf job) { try { measure = Class.forName(job.get(MeanShiftCanopyConfigKeys.DISTANCE_MEASURE_KEY)).asSubclass( - DistanceMeasure.class).newInstance(); + DistanceMeasure.class).newInstance(); measure.configure(job); } catch (ClassNotFoundException e) { throw new IllegalStateException(e); @@ -59,13 +62,14 @@ t2 = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.T2_KEY)); convergenceDelta = Double.parseDouble(job.get(MeanShiftCanopyConfigKeys.CLUSTER_CONVERGENCE_KEY)); } + /** * Configure the Canopy for unit tests - * - * @param aDelta the convergence criteria + * + * @param aDelta + * the convergence criteria */ - public void config(DistanceMeasure aMeasure, double aT1, double aT2, - double aDelta) { + public void config(DistanceMeasure aMeasure, double aT1, double aT2, double aDelta) { nextCanopyId = 100; // so canopyIds will sort properly measure = aMeasure; t1 = aT1; @@ -74,15 +78,17 @@ } /** - * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the center of - * each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with the closest covering - * canopy. If the given canopy does not cover any other canopies, add it to the canopies list. - * - * @param aCanopy a MeanShiftCanopy to be merged - * @param canopies the List<Canopy> to be appended + * Merge the given canopy into the canopies list. If it touches any existing canopy (norm<T1) then add the + * center of each to the other. If it covers any other canopies (norm<T2), then merge the given canopy with + * the closest covering canopy. If the given canopy does not cover any other canopies, add it to the + * canopies list. + * + * @param aCanopy + * a MeanShiftCanopy to be merged + * @param canopies + * the List<Canopy> to be appended */ - public void mergeCanopy(MeanShiftCanopy aCanopy, - List<MeanShiftCanopy> canopies) { + public void mergeCanopy(MeanShiftCanopy aCanopy, List<MeanShiftCanopy> canopies) { MeanShiftCanopy closestCoveringCanopy = null; double closestNorm = Double.MAX_VALUE; for (MeanShiftCanopy canopy : canopies) { @@ -91,7 +97,7 @@ aCanopy.touch(canopy); } if (norm < t2) { - if (closestCoveringCanopy == null || norm < closestNorm) { + if ((closestCoveringCanopy == null) || (norm < closestNorm)) { closestNorm = norm; closestCoveringCanopy = canopy; } @@ -103,24 +109,24 @@ closestCoveringCanopy.merge(aCanopy); } } - + /** Emit the new canopy to the collector, keyed by the canopy's Id */ - static void emitCanopy(MeanShiftCanopy canopy, - OutputCollector<Text, WritableComparable<?>> collector) - throws IOException { + static void emitCanopy(MeanShiftCanopy canopy, OutputCollector<Text,WritableComparable<?>> collector) throws IOException { String identifier = canopy.getIdentifier(); collector.collect(new Text(identifier), new Text("new " + canopy.toString())); } /** * Shift the center to the new centroid of the cluster - * - * @param canopy the canopy to shift. + * + * @param canopy + * the canopy to shift. * @return if the cluster is converged */ public boolean shiftToMean(MeanShiftCanopy canopy) { Vector centroid = canopy.computeCentroid(); - canopy.setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta); + canopy + .setConverged(new EuclideanDistanceMeasure().distance(centroid, canopy.getCenter()) < convergenceDelta); canopy.setCenter(centroid); canopy.setNumPoints(1); canopy.setPointTotal(centroid.clone()); @@ -129,9 +135,11 @@ /** * Return if the point is covered by this canopy - * - * @param canopy a canopy. - * @param point a Vector point + * + * @param canopy + * a canopy. + * @param point + * a Vector point * @return if the point is covered */ boolean covers(MeanShiftCanopy canopy, Vector point) { @@ -141,8 +149,10 @@ /** * Return if the point is closely covered by the canopy * - * @param canopy a canopy. - * @param point a Vector point + * @param canopy + * a canopy. + * @param point + * a Vector point * @return if the point is covered */ public boolean closelyBound(MeanShiftCanopy canopy, Vector point) {
Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyConfigKeys.java Sat Feb 13 21:07:53 2010 @@ -1,12 +1,12 @@ package org.apache.mahout.clustering.meanshift; public interface MeanShiftCanopyConfigKeys { - + // keys used by Driver, Mapper, Combiner & Reducer String DISTANCE_MEASURE_KEY = "org.apache.mahout.clustering.canopy.measure"; String T1_KEY = "org.apache.mahout.clustering.canopy.t1"; String T2_KEY = "org.apache.mahout.clustering.canopy.t2"; String CONTROL_PATH_KEY = "org.apache.mahout.clustering.control.path"; String CLUSTER_CONVERGENCE_KEY = "org.apache.mahout.clustering.canopy.convergence"; - + } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyDriver.java Sat Feb 13 21:07:53 2010 @@ -17,6 +17,8 @@ package org.apache.mahout.clustering.meanshift; +import java.io.IOException; + import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -39,43 +41,38 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; - -public class MeanShiftCanopyDriver { - - private static final Logger log = LoggerFactory - .getLogger(MeanShiftCanopyDriver.class); - - private MeanShiftCanopyDriver() { - } - +public final class MeanShiftCanopyDriver { + + private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyDriver.class); + + private MeanShiftCanopyDriver() { } + public static void main(String[] args) { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); - + Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); - - Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d"). - withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()). - withDescription("The distance measure class name.").create(); - - - Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1"). - withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()). - withDescription("The T1 distance threshold.").create(); - - Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2"). - withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()). - withDescription("The T1 distance threshold.").create(); - - Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt). - withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt). - withOption(threshold2Opt).create(); - + + Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d") + .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()) + .withDescription("The distance measure class name.").create(); + + Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1") + .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()) + .withDescription("The T1 distance threshold.").create(); + + Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2") + .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()) + .withDescription("The T1 distance threshold.").create(); + + Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt) + .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt) + .withOption(threshold2Opt).create(); + try { Parser parser = new Parser(); parser.setGroup(group); @@ -84,45 +81,57 @@ CommandLineUtil.printHelp(group); return; } - + String input = cmdLine.getValue(inputOpt).toString(); String output = cmdLine.getValue(outputOpt).toString(); String measureClassName = cmdLine.getValue(modelOpt).toString(); double t1 = Double.parseDouble(cmdLine.getValue(threshold1Opt).toString()); double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString()); double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString()); - runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, + MeanShiftCanopyDriver.runJob(input, output, output + MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, measureClassName, t1, t2, convergenceDelta); } catch (OptionException e) { - log.error("Exception parsing command line: ", e); + MeanShiftCanopyDriver.log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } } - + /** * Run the job - * - * @param input the input pathname String - * @param output the output pathname String - * @param control the control path - * @param measureClassName the DistanceMeasure class name - * @param t1 the T1 distance threshold - * @param t2 the T2 distance threshold - * @param convergenceDelta the double convergence criteria + * + * @param input + * the input pathname String + * @param output + * the output pathname String + * @param control + * the control path + * @param measureClassName + * the DistanceMeasure class name + * @param t1 + * the T1 distance threshold + * @param t2 + * the T2 distance threshold + * @param convergenceDelta + * the double convergence criteria */ - public static void runJob(String input, String output, String control, - String measureClassName, double t1, double t2, double convergenceDelta) { - + public static void runJob(String input, + String output, + String control, + String measureClassName, + double t1, + double t2, + double convergenceDelta) { + Configurable client = new JobClient(); JobConf conf = new JobConf(MeanShiftCanopyDriver.class); - + conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(MeanShiftCanopy.class); - + FileInputFormat.setInputPaths(conf, new Path(input)); Path outPath = new Path(output); FileOutputFormat.setOutputPath(conf, outPath); - + conf.setMapperClass(MeanShiftCanopyMapper.class); conf.setReducerClass(MeanShiftCanopyReducer.class); conf.setNumReduceTasks(1); @@ -133,12 +142,12 @@ conf.set(MeanShiftCanopyConfigKeys.T1_KEY, String.valueOf(t1)); conf.set(MeanShiftCanopyConfigKeys.T2_KEY, String.valueOf(t2)); conf.set(MeanShiftCanopyConfigKeys.CONTROL_PATH_KEY, control); - + client.setConf(conf); try { JobClient.runJob(conf); } catch (IOException e) { - log.warn(e.toString(), e); + MeanShiftCanopyDriver.log.warn(e.toString(), e); } } } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyJob.java Sat Feb 13 21:07:53 2010 @@ -17,6 +17,8 @@ package org.apache.mahout.clustering.meanshift; +import java.io.IOException; + import org.apache.commons.cli2.CommandLine; import org.apache.commons.cli2.Group; import org.apache.commons.cli2.Option; @@ -34,46 +36,41 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import java.io.IOException; - public class MeanShiftCanopyJob { - + protected static final String CONTROL_CONVERGED = "/control/converged"; - - private static final Logger log = LoggerFactory - .getLogger(MeanShiftCanopyJob.class); - - private MeanShiftCanopyJob() { - } - + + private static final Logger log = LoggerFactory.getLogger(MeanShiftCanopyJob.class); + + private MeanShiftCanopyJob() { } + public static void main(String[] args) throws IOException { DefaultOptionBuilder obuilder = new DefaultOptionBuilder(); ArgumentBuilder abuilder = new ArgumentBuilder(); GroupBuilder gbuilder = new GroupBuilder(); - + Option inputOpt = DefaultOptionCreator.inputOption().create(); Option outputOpt = DefaultOptionCreator.outputOption().create(); Option convergenceDeltaOpt = DefaultOptionCreator.convergenceOption().create(); Option maxIterOpt = DefaultOptionCreator.maxIterOption().create(); Option helpOpt = DefaultOptionCreator.helpOption(); - - Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d"). - withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()). - withDescription("The distance measure class name.").create(); - - - Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1"). - withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()). - withDescription("The T1 distance threshold.").create(); - - Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2"). - withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()). - withDescription("The T1 distance threshold.").create(); - - Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt).withOption(modelOpt). - withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt).withOption(maxIterOpt). - withOption(threshold2Opt).create(); - + + Option modelOpt = obuilder.withLongName("distanceClass").withRequired(true).withShortName("d") + .withArgument(abuilder.withName("distanceClass").withMinimum(1).withMaximum(1).create()) + .withDescription("The distance measure class name.").create(); + + Option threshold1Opt = obuilder.withLongName("threshold_1").withRequired(true).withShortName("t1") + .withArgument(abuilder.withName("threshold_1").withMinimum(1).withMaximum(1).create()) + .withDescription("The T1 distance threshold.").create(); + + Option threshold2Opt = obuilder.withLongName("threshold_2").withRequired(true).withShortName("t2") + .withArgument(abuilder.withName("threshold_2").withMinimum(1).withMaximum(1).create()) + .withDescription("The T1 distance threshold.").create(); + + Group group = gbuilder.withName("Options").withOption(inputOpt).withOption(outputOpt) + .withOption(modelOpt).withOption(helpOpt).withOption(convergenceDeltaOpt).withOption(threshold1Opt) + .withOption(maxIterOpt).withOption(threshold2Opt).create(); + try { Parser parser = new Parser(); parser.setGroup(group); @@ -82,7 +79,7 @@ CommandLineUtil.printHelp(group); return; } - + String input = cmdLine.getValue(inputOpt).toString(); String output = cmdLine.getValue(outputOpt).toString(); String measureClassName = cmdLine.getValue(modelOpt).toString(); @@ -90,27 +87,37 @@ double t2 = Double.parseDouble(cmdLine.getValue(threshold2Opt).toString()); double convergenceDelta = Double.parseDouble(cmdLine.getValue(convergenceDeltaOpt).toString()); int maxIterations = Integer.parseInt(cmdLine.getValue(maxIterOpt).toString()); - runJob(input, output, measureClassName, t1, t2, convergenceDelta, - maxIterations); + MeanShiftCanopyJob.runJob(input, output, measureClassName, t1, t2, convergenceDelta, maxIterations); } catch (OptionException e) { - log.error("Exception parsing command line: ", e); + MeanShiftCanopyJob.log.error("Exception parsing command line: ", e); CommandLineUtil.printHelp(group); } } - + /** * Run the job - * - * @param input the input pathname String - * @param output the output pathname String - * @param measureClassName the DistanceMeasure class name - * @param t1 the T1 distance threshold - * @param t2 the T2 distance threshold - * @param convergenceDelta the double convergence criteria - * @param maxIterations an int number of iterations + * + * @param input + * the input pathname String + * @param output + * the output pathname String + * @param measureClassName + * the DistanceMeasure class name + * @param t1 + * the T1 distance threshold + * @param t2 + * the T2 distance threshold + * @param convergenceDelta + * the double convergence criteria + * @param maxIterations + * an int number of iterations */ - public static void runJob(String input, String output, - String measureClassName, double t1, double t2, double convergenceDelta, + public static void runJob(String input, + String output, + String measureClassName, + double t1, + double t2, + double convergenceDelta, int maxIterations) throws IOException { // delete the output directory Configuration conf = new JobConf(MeanShiftCanopyDriver.class); @@ -124,18 +131,18 @@ boolean converged = false; int iteration = 0; String clustersIn = input; - while (!converged && iteration < maxIterations) { - log.info("Iteration {}", iteration); + while (!converged && (iteration < maxIterations)) { + MeanShiftCanopyJob.log.info("Iteration {}", iteration); // point the output to a new directory per iteration String clustersOut = output + "/canopies-" + iteration; - String controlOut = output + CONTROL_CONVERGED; - MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut, - measureClassName, t1, t2, convergenceDelta); + String controlOut = output + MeanShiftCanopyJob.CONTROL_CONVERGED; + MeanShiftCanopyDriver.runJob(clustersIn, clustersOut, controlOut, measureClassName, t1, t2, + convergenceDelta); converged = FileSystem.get(conf).exists(new Path(controlOut)); // now point the input to the old output directory clustersIn = output + "/canopies-" + iteration; iteration++; } } - + } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyMapper.java Sat Feb 13 21:07:53 2010 @@ -17,6 +17,10 @@ package org.apache.mahout.clustering.meanshift; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.io.Text; import org.apache.hadoop.io.WritableComparable; import org.apache.hadoop.mapred.JobConf; @@ -25,26 +29,23 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - public class MeanShiftCanopyMapper extends MapReduceBase implements - Mapper<WritableComparable<?>, MeanShiftCanopy, Text, MeanShiftCanopy> { - + Mapper<WritableComparable<?>,MeanShiftCanopy,Text,MeanShiftCanopy> { + private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>(); private MeanShiftCanopyClusterer clusterer; - private OutputCollector<Text, MeanShiftCanopy> output; - + private OutputCollector<Text,MeanShiftCanopy> output; + @Override - public void map(WritableComparable<?> key, MeanShiftCanopy canopy, - OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter) - throws IOException { + public void map(WritableComparable<?> key, + MeanShiftCanopy canopy, + OutputCollector<Text,MeanShiftCanopy> output, + Reporter reporter) throws IOException { this.output = output; clusterer.mergeCanopy(canopy.shallowCopy(), canopies); } - + @Override public void close() throws IOException { for (MeanShiftCanopy canopy : canopies) { @@ -53,11 +54,11 @@ } super.close(); } - + @Override public void configure(JobConf job) { super.configure(job); clusterer = new MeanShiftCanopyClusterer(job); } - + } Modified: lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java (original) +++ lucene/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/meanshift/MeanShiftCanopyReducer.java Sat Feb 13 21:07:53 2010 @@ -17,6 +17,11 @@ package org.apache.mahout.clustering.meanshift; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; @@ -26,30 +31,26 @@ import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.Reporter; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Iterator; -import java.util.List; - public class MeanShiftCanopyReducer extends MapReduceBase implements - Reducer<Text, MeanShiftCanopy, Text, MeanShiftCanopy> { - + Reducer<Text,MeanShiftCanopy,Text,MeanShiftCanopy> { + private final List<MeanShiftCanopy> canopies = new ArrayList<MeanShiftCanopy>(); private MeanShiftCanopyClusterer clusterer; private boolean allConverged = true; - + private JobConf conf; - + @Override - public void reduce(Text key, Iterator<MeanShiftCanopy> values, - OutputCollector<Text, MeanShiftCanopy> output, Reporter reporter) - throws IOException { - + public void reduce(Text key, + Iterator<MeanShiftCanopy> values, + OutputCollector<Text,MeanShiftCanopy> output, + Reporter reporter) throws IOException { + while (values.hasNext()) { MeanShiftCanopy canopy = values.next(); clusterer.mergeCanopy(canopy.shallowCopy(), canopies); } - + for (MeanShiftCanopy canopy : canopies) { boolean converged = clusterer.shiftToMean(canopy); if (converged) { @@ -58,16 +59,16 @@ allConverged = converged && allConverged; output.collect(new Text(canopy.getIdentifier()), canopy); } - + } - + @Override public void configure(JobConf job) { super.configure(job); this.conf = job; clusterer = new MeanShiftCanopyClusterer(job); } - + @Override public void close() throws IOException { if (allConverged) { @@ -76,5 +77,5 @@ } super.close(); } - + } Modified: lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java URL: http://svn.apache.org/viewvc/lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=909914&r1=909913&r2=909914&view=diff ============================================================================== --- lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java (original) +++ lucene/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java Sat Feb 13 21:07:53 2010 @@ -26,6 +26,7 @@ import org.apache.hadoop.mapred.Reducer; import org.apache.hadoop.mapred.lib.IdentityReducer; import org.apache.mahout.clustering.ClusteringTestUtils; +import org.apache.mahout.common.DummyReporter; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.math.RandomAccessSparseVector; import org.apache.mahout.math.Vector; @@ -254,7 +255,7 @@ List<Canopy> canopies = new ArrayList<Canopy>(); for (VectorWritable point : points) { - clusterer.addPointToCanopies(point.get(), canopies); + clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter()); } System.out.println("testIterativeManhattan"); @@ -269,7 +270,7 @@ List<Canopy> canopies = new ArrayList<Canopy>(); for (VectorWritable point : points) { - clusterer.addPointToCanopies(point.get(), canopies); + clusterer.addPointToCanopies(point.get(), canopies, new DummyReporter()); } System.out.println("testIterativeEuclidean");