Modified: mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java?rev=987647&r1=987646&r2=987647&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/clustering/dirichlet/TestMapReduce.java Fri Aug 20 21:56:16 2010 @@ -33,6 +33,7 @@ import org.apache.hadoop.mapreduce.Reduc import org.apache.mahout.clustering.Cluster; import org.apache.mahout.clustering.ClusteringTestUtils; import org.apache.mahout.clustering.Model; +import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution; import org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalModel; import org.apache.mahout.clustering.dirichlet.models.NormalModel; import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution; @@ -117,9 +118,7 @@ public class TestMapReduce extends Mahou /** Test the basic Mapper */ public void testMapper() throws Exception { generateSamples(10, 0, 0, 1); - DirichletState state = new DirichletState(new NormalModelDistribution(new VectorWritable(new DenseVector(2))), - 5, - 1); + DirichletState state = new DirichletState(new NormalModelDistribution(new VectorWritable(new DenseVector(2))), 5, 1); DirichletMapper mapper = new DirichletMapper(); mapper.setup(state); @@ -141,9 +140,7 @@ public class TestMapReduce extends Mahou generateSamples(100, 2, 0, 1); generateSamples(100, 0, 2, 1); generateSamples(100, 2, 2, 1); - DirichletState state = new DirichletState(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))), - 20, - 1); + DirichletState state = new DirichletState(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))), 20, 1); DirichletMapper mapper = new DirichletMapper(); mapper.setup(state); @@ -158,8 +155,11 @@ public class TestMapReduce extends Mahou DirichletReducer reducer = new DirichletReducer(); reducer.setup(state); DummyRecordWriter<Text, DirichletCluster> reduceWriter = new DummyRecordWriter<Text, DirichletCluster>(); - Reducer<Text, VectorWritable, Text, DirichletCluster>.Context reduceContext = DummyRecordWriter - .build(reducer, conf, reduceWriter, Text.class, VectorWritable.class); + Reducer<Text, VectorWritable, Text, DirichletCluster>.Context reduceContext = DummyRecordWriter.build(reducer, + conf, + reduceWriter, + Text.class, + VectorWritable.class); for (Text key : mapWriter.getKeys()) { reducer.reduce(new Text(key), mapWriter.getValue(key), reduceContext); } @@ -174,9 +174,7 @@ public class TestMapReduce extends Mahou generateSamples(100, 2, 0, 1); generateSamples(100, 0, 2, 1); generateSamples(100, 2, 2, 1); - DirichletState state = new DirichletState(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))), - 20, - 1.0); + DirichletState state = new DirichletState(new SampledNormalDistribution(new VectorWritable(new DenseVector(2))), 20, 1.0); List<Model<VectorWritable>[]> models = new ArrayList<Model<VectorWritable>[]>(); @@ -194,8 +192,11 @@ public class TestMapReduce extends Mahou DirichletReducer reducer = new DirichletReducer(); reducer.setup(state); DummyRecordWriter<Text, DirichletCluster> reduceWriter = new DummyRecordWriter<Text, DirichletCluster>(); - Reducer<Text, VectorWritable, Text, DirichletCluster>.Context reduceContext = DummyRecordWriter - .build(reducer, conf, reduceWriter, Text.class, VectorWritable.class); + Reducer<Text, VectorWritable, Text, DirichletCluster>.Context reduceContext = DummyRecordWriter.build(reducer, + conf, + reduceWriter, + Text.class, + VectorWritable.class); for (Text key : mapWriter.getKeys()) { reducer.reduce(new Text(key), mapWriter.getValue(key), reduceContext); } @@ -247,11 +248,11 @@ public class TestMapReduce extends Mahou ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf); // Now run the driver using the run() method. Others can use runJob() as before Integer maxIterations = 5; + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2))); String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(), optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(), - optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), - "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", - optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), "org.apache.mahout.math.DenseVector", + optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), modelDistribution.getClass().getName(), + optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), modelDistribution.getModelPrototype().get().getClass().getName(), optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION), maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0", optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION), optKey(DefaultOptionCreator.METHOD_OPTION), @@ -260,9 +261,7 @@ public class TestMapReduce extends Mahou // and inspect results List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution"); - conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector"); - conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2"); + conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20"); conf.set(DirichletDriver.ALPHA_0_KEY, "1.0"); for (int i = 0; i <= maxIterations; i++) { @@ -281,11 +280,11 @@ public class TestMapReduce extends Mahou ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data.txt"), fs, conf); // Now run the driver using the run() method. Others can use runJob() as before Integer maxIterations = 5; + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2))); String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), getTestTempDirPath("input").toString(), optKey(DefaultOptionCreator.OUTPUT_OPTION), getTestTempDirPath("output").toString(), - optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), - "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", - optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), "org.apache.mahout.math.DenseVector", + optKey(DirichletDriver.MODEL_DISTRIBUTION_CLASS_OPTION), modelDistribution.getClass().getName(), + optKey(DirichletDriver.MODEL_PROTOTYPE_CLASS_OPTION), modelDistribution.getModelPrototype().get().getClass().getName(), optKey(DefaultOptionCreator.NUM_CLUSTERS_OPTION), "20", optKey(DefaultOptionCreator.MAX_ITERATIONS_OPTION), maxIterations.toString(), optKey(DirichletDriver.ALPHA_OPTION), "1.0", optKey(DefaultOptionCreator.OVERWRITE_OPTION), optKey(DefaultOptionCreator.CLUSTERING_OPTION) }; @@ -293,9 +292,7 @@ public class TestMapReduce extends Mahou // and inspect results List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution"); - conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector"); - conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2"); + conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20"); conf.set(DirichletDriver.ALPHA_0_KEY, "1.0"); for (int i = 0; i <= maxIterations; i++) { @@ -310,10 +307,10 @@ public class TestMapReduce extends Mahou generate4Datasets(); // Now run the driver int maxIterations = 3; + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"), - "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", - "org.apache.mahout.math.DenseVector", + modelDistribution, 20, maxIterations, 1.0, @@ -325,9 +322,7 @@ public class TestMapReduce extends Mahou // and inspect results List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution"); - conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector"); - conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2"); + conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20"); conf.set(DirichletDriver.ALPHA_0_KEY, "1.0"); for (int i = 0; i <= maxIterations; i++) { @@ -356,10 +351,10 @@ public class TestMapReduce extends Mahou generate4Datasets(); // Now run the driver int maxIterations = 3; + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"), - "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution", - "org.apache.mahout.math.DenseVector", + modelDistribution, 20, maxIterations, 1.0, @@ -371,9 +366,7 @@ public class TestMapReduce extends Mahou // and inspect results List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf.set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution"); - conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector"); - conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2"); + conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20"); conf.set(DirichletDriver.ALPHA_0_KEY, "1.0"); for (int i = 0; i <= maxIterations; i++) { @@ -402,10 +395,10 @@ public class TestMapReduce extends Mahou ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("input/data4.txt"), fs, conf); // Now run the driver int maxIterations = 3; + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("input"), getTestTempDirPath("output"), - "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution", - "org.apache.mahout.math.DenseVector", + modelDistribution, 20, maxIterations, 1.0, @@ -417,10 +410,7 @@ public class TestMapReduce extends Mahou // and inspect results List<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf - .set(DirichletDriver.MODEL_FACTORY_KEY, "org.apache.mahout.clustering.dirichlet.models.AsymmetricSampledNormalDistribution"); - conf.set(DirichletDriver.MODEL_PROTOTYPE_KEY, "org.apache.mahout.math.DenseVector"); - conf.set(DirichletDriver.PROTOTYPE_SIZE_KEY, "2"); + conf.set(DirichletDriver.MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(DirichletDriver.NUM_CLUSTERS_KEY, "20"); conf.set(DirichletDriver.ALPHA_0_KEY, "1.0"); for (int i = 0; i <= maxIterations; i++) {
Modified: mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java URL: http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java?rev=987647&r1=987646&r2=987647&view=diff ============================================================================== --- mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java (original) +++ mahout/trunk/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java Fri Aug 20 21:56:16 2010 @@ -29,9 +29,12 @@ import org.apache.commons.cli2.builder.D import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.mahout.clustering.Model; +import org.apache.mahout.clustering.ModelDistribution; import org.apache.mahout.clustering.dirichlet.DirichletCluster; import org.apache.mahout.clustering.dirichlet.DirichletDriver; import org.apache.mahout.clustering.dirichlet.DirichletMapper; +import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution; +import org.apache.mahout.clustering.dirichlet.models.DistanceMeasureClusterDistribution; import org.apache.mahout.clustering.dirichlet.models.NormalModelDistribution; import org.apache.mahout.clustering.syntheticcontrol.Constants; import org.apache.mahout.clustering.syntheticcontrol.canopy.InputDriver; @@ -58,16 +61,8 @@ public final class Job extends Dirichlet log.info("Running with default arguments"); Path output = new Path("output"); HadoopUtil.overwriteOutput(output); - new Job().job(new Path("testdata"), - output, - "org.apache.mahout.clustering.syntheticcontrol.dirichlet.NormalScModelDistribution", - "org.apache.mahout.math.RandomAccessSparseVector", - 10, - 5, - 1.0, - 1, - false, - 0.001); + AbstractVectorModelDistribution modelDistribution = new DistanceMeasureClusterDistribution(new VectorWritable(new RandomAccessSparseVector(60))); + new Job().job(new Path("testdata"), output, modelDistribution, 10, 5, 0.5, 1, false, 0.001); } } @@ -90,6 +85,7 @@ public final class Job extends Dirichlet .withArgument(new ArgumentBuilder().withName("prototypeClass").withDefault(RandomAccessSparseVector.class.getName()) .withMinimum(1).withMaximum(1).create()) .withDescription("The ModelDistribution prototype Vector class name. Defaults to RandomAccessSparseVector").create()); + addOption(DefaultOptionCreator.distanceMeasureOption().withRequired(false).create()); addOption(DefaultOptionCreator.emitMostLikelyOption().create()); addOption(DefaultOptionCreator.thresholdOption().create()); addOption(DefaultOptionCreator.numReducersOption().create()); @@ -106,14 +102,20 @@ public final class Job extends Dirichlet } String modelFactory = getOption(MODEL_DISTRIBUTION_CLASS_OPTION); String modelPrototype = getOption(MODEL_PROTOTYPE_CLASS_OPTION); + String distanceMeasure = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); int numModels = Integer.parseInt(getOption(DefaultOptionCreator.NUM_CLUSTERS_OPTION)); int numReducers = Integer.parseInt(getOption(DefaultOptionCreator.MAX_REDUCERS_OPTION)); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); boolean emitMostLikely = Boolean.parseBoolean(getOption(DefaultOptionCreator.EMIT_MOST_LIKELY_OPTION)); double threshold = Double.parseDouble(getOption(DefaultOptionCreator.THRESHOLD_OPTION)); double alpha0 = Double.parseDouble(getOption(ALPHA_OPTION)); + int prototypeSize = DirichletDriver.readPrototypeSize(input); + AbstractVectorModelDistribution modelDistribution = DirichletDriver.createModelDistribution(modelFactory, + modelPrototype, + distanceMeasure, + prototypeSize); - job(input, output, modelFactory, modelPrototype, numModels, maxIterations, alpha0, numReducers, emitMostLikely, threshold); + job(input, output, modelDistribution, numModels, maxIterations, alpha0, numReducers, emitMostLikely, threshold); return 0; } @@ -124,8 +126,8 @@ public final class Job extends Dirichlet * the directory pathname for input points * @param output * the directory pathname for output points - * @param modelFactory - * the ModelDistribution class name + * @param modelDistribution + * the ModelDistribution * @param numModels * the number of Models * @param maxIterations @@ -137,8 +139,7 @@ public final class Job extends Dirichlet */ private void job(Path input, Path output, - String modelFactory, - String modelPrototype, + ModelDistribution<VectorWritable> modelDistribution, int numModels, int maxIterations, double alpha0, @@ -147,11 +148,10 @@ public final class Job extends Dirichlet double threshold) throws IOException, ClassNotFoundException, InstantiationException, IllegalAccessException, NoSuchMethodException, InvocationTargetException, SecurityException, InterruptedException { Path directoryContainingConvertedInput = new Path(output, Constants.DIRECTORY_CONTAINING_CONVERTED_INPUT); - InputDriver.runJob(input, directoryContainingConvertedInput, modelPrototype); + InputDriver.runJob(input, directoryContainingConvertedInput, "org.apache.mahout.math.RandomAccessSparseVector"); DirichletDriver.runJob(directoryContainingConvertedInput, output, - modelFactory, - modelPrototype, + modelDistribution, numModels, maxIterations, alpha0, @@ -172,11 +172,7 @@ public final class Job extends Dirichlet * @param output * the String output directory * @param modelDistribution - * the String class name of the ModelDistribution - * @param vectorClassName - * the String class name of the Vector to use - * @param prototypeSize - * the size of the Vector prototype for the Dirichlet Models + * the ModelDistribution * @param numIterations * the int number of Iterations * @param numModels @@ -185,21 +181,17 @@ public final class Job extends Dirichlet * the double alpha_0 value */ public static void printResults(String output, - String modelDistribution, - String vectorClassName, - int prototypeSize, + ModelDistribution<VectorWritable> modelDistribution, int numIterations, int numModels, double alpha0) throws NoSuchMethodException, InvocationTargetException { Collection<List<DirichletCluster>> clusters = new ArrayList<List<DirichletCluster>>(); Configuration conf = new Configuration(); - conf.set(MODEL_FACTORY_KEY, modelDistribution); + conf.set(MODEL_DISTRIBUTION_KEY, modelDistribution.asJsonString()); conf.set(NUM_CLUSTERS_KEY, Integer.toString(numModels)); conf.set(ALPHA_0_KEY, Double.toString(alpha0)); for (int i = 0; i < numIterations; i++) { conf.set(STATE_IN_KEY, output + "/clusters-" + i); - conf.set(MODEL_PROTOTYPE_KEY, vectorClassName); - conf.set(PROTOTYPE_SIZE_KEY, Integer.toString(prototypeSize)); clusters.add(DirichletMapper.getDirichletState(conf).getClusters()); } printClusters(clusters, 0); Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=987647&r1=987646&r2=987647&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Fri Aug 20 21:56:16 2010 @@ -36,7 +36,10 @@ import org.apache.lucene.store.RAMDirect import org.apache.lucene.util.Version; import org.apache.mahout.clustering.canopy.CanopyDriver; import org.apache.mahout.clustering.dirichlet.DirichletDriver; -import org.apache.mahout.clustering.dirichlet.models.L1ModelDistribution; +import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution; +import org.apache.mahout.clustering.dirichlet.models.DistanceMeasureClusterDistribution; +import org.apache.mahout.clustering.dirichlet.models.GaussianClusterDistribution; +import org.apache.mahout.clustering.dirichlet.models.SampledNormalDistribution; import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver; import org.apache.mahout.clustering.kmeans.KMeansDriver; import org.apache.mahout.clustering.meanshift.MeanShiftCanopyDriver; @@ -82,6 +85,7 @@ public class TestClusterDumper extends M ClusteringTestUtils.writePointsToFile(sampleData, getTestTempFilePath("testdata/file1"), fs, conf); } + @SuppressWarnings("deprecation") private void getSampleData(String[] docs2) throws IOException { sampleData = new ArrayList<VectorWritable>(); RAMDirectory directory = new RAMDirectory(); @@ -193,8 +197,28 @@ public class TestClusterDumper extends M public void testDirichlet() throws Exception { Path output = getTestTempDirPath("output"); NamedVector prototype = (NamedVector) sampleData.get(0).get(); - DirichletDriver.runJob(getTestTempDirPath("testdata"), output, L1ModelDistribution.class.getName(), prototype.getDelegate() - .getClass().getName(), 15, 10, 1.0, 1, true, true, 0, false); + AbstractVectorModelDistribution modelDistribution = new SampledNormalDistribution(new VectorWritable(prototype)); + DirichletDriver.runJob(getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, 1, true, true, 0, false); + // run ClusterDumper + ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints")); + clusterDumper.printClusters(termDictionary); + } + + public void testDirichlet2() throws Exception { + Path output = getTestTempDirPath("output"); + NamedVector prototype = (NamedVector) sampleData.get(0).get(); + AbstractVectorModelDistribution modelDistribution = new GaussianClusterDistribution(new VectorWritable(prototype)); + DirichletDriver.runJob(getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, 1, true, true, 0, true); + // run ClusterDumper + ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints")); + clusterDumper.printClusters(termDictionary); + } + + public void testDirichlet3() throws Exception { + Path output = getTestTempDirPath("output"); + NamedVector prototype = (NamedVector) sampleData.get(0).get(); + AbstractVectorModelDistribution modelDistribution = new DistanceMeasureClusterDistribution(new VectorWritable(prototype)); + DirichletDriver.runJob(getTestTempDirPath("testdata"), output, modelDistribution, 15, 10, 1.0, 1, true, true, 0, true); // run ClusterDumper ClusterDumper clusterDumper = new ClusterDumper(new Path(output, "clusters-10"), new Path(output, "clusteredPoints")); clusterDumper.printClusters(termDictionary); Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=987647&r1=987646&r2=987647&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Fri Aug 20 21:56:16 2010 @@ -35,7 +35,8 @@ import org.apache.mahout.clustering.Clus import org.apache.mahout.clustering.canopy.Canopy; import org.apache.mahout.clustering.canopy.CanopyDriver; import org.apache.mahout.clustering.dirichlet.DirichletDriver; -import org.apache.mahout.clustering.dirichlet.models.L1ModelDistribution; +import org.apache.mahout.clustering.dirichlet.models.AbstractVectorModelDistribution; +import org.apache.mahout.clustering.dirichlet.models.GaussianClusterDistribution; import org.apache.mahout.clustering.fuzzykmeans.FuzzyKMeansDriver; import org.apache.mahout.clustering.kmeans.KMeansDriver; import org.apache.mahout.clustering.kmeans.TestKmeansClustering; @@ -44,7 +45,6 @@ import org.apache.mahout.common.MahoutTe import org.apache.mahout.common.distance.DistanceMeasure; import org.apache.mahout.common.distance.EuclideanDistanceMeasure; import org.apache.mahout.math.DenseVector; -import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; public class TestCDbwEvaluator extends MahoutTestCase { @@ -205,11 +205,10 @@ public class TestCDbwEvaluator extends M } public void testDirichlet() throws Exception { - Vector prototype = new DenseVector(2); + AbstractVectorModelDistribution modelDistribution = new GaussianClusterDistribution(new VectorWritable(new DenseVector(2))); DirichletDriver.runJob(getTestTempDirPath("testdata"), getTestTempDirPath("output"), - L1ModelDistribution.class.getName(), - prototype.getClass().getName(), + modelDistribution, 15, 5, 1.0, @@ -217,7 +216,7 @@ public class TestCDbwEvaluator extends M true, true, 0, - false); + true); int numIterations = 2; Path output = getTestTempDirPath("output"); CDbwDriver.runJob(new Path(output, "clusters-5"), Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java?rev=987647&r1=987646&r2=987647&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/dirichlet/TestL1ModelClustering.java Fri Aug 20 21:56:16 2010 @@ -106,6 +106,7 @@ public class TestL1ModelClustering exten RandomUtils.useTestSeed(); } + @SuppressWarnings("deprecation") private void getSampleData(String[] docs2) throws IOException { sampleData = new ArrayList<VectorWritable>(); RAMDirectory directory = new RAMDirectory();
