Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsDriver.java Fri Oct 1 11:00:16 2010 @@ -63,8 +63,8 @@ public final class RepresentativePointsD } @Override - public int run(String[] args) throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, - InterruptedException { + public int run(String[] args) + throws ClassNotFoundException, InstantiationException, IllegalAccessException, IOException, InterruptedException { addInputOption(); addOutputOption(); addOption(DefaultOptionCreator.distanceMeasureOption().create()); @@ -78,7 +78,8 @@ public final class RepresentativePointsD Path output = getOutputPath(); String distanceMeasureClass = getOption(DefaultOptionCreator.DISTANCE_MEASURE_OPTION); int maxIterations = Integer.parseInt(getOption(DefaultOptionCreator.MAX_ITERATIONS_OPTION)); - boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase(DefaultOptionCreator.SEQUENTIAL_METHOD); + boolean runSequential = getOption(DefaultOptionCreator.METHOD_OPTION).equalsIgnoreCase( + DefaultOptionCreator.SEQUENTIAL_METHOD); ClassLoader ccl = Thread.currentThread().getContextClassLoader(); DistanceMeasure measure = ccl.loadClass(distanceMeasureClass).asSubclass(DistanceMeasure.class).newInstance(); @@ -92,8 +93,8 @@ public final class RepresentativePointsD Path output, DistanceMeasure measure, int numIterations, - boolean runSequential) throws InstantiationException, IllegalAccessException, IOException, - InterruptedException, ClassNotFoundException { + boolean runSequential) + throws InstantiationException, IllegalAccessException, IOException, InterruptedException, ClassNotFoundException { Path stateIn = new Path(output, "representativePoints-0"); writeInitialState(stateIn, clustersIn); @@ -110,8 +111,8 @@ public final class RepresentativePointsD conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName()); } - private static void writeInitialState(Path output, Path clustersIn) throws InstantiationException, IllegalAccessException, - IOException, SecurityException { + private static void writeInitialState(Path output, Path clustersIn) + throws InstantiationException, IllegalAccessException, IOException, SecurityException { Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); for (FileStatus part : fs.listStatus(clustersIn)) { @@ -137,8 +138,8 @@ public final class RepresentativePointsD Path stateIn, Path stateOut, DistanceMeasure measure, - boolean runSequential) throws IOException, InterruptedException, ClassNotFoundException, - InstantiationException, IllegalAccessException { + boolean runSequential) + throws IOException, InterruptedException, ClassNotFoundException, InstantiationException, IllegalAccessException { if (runSequential) { runIterationSeq(conf, clusteredPointsIn, stateIn, stateOut, measure); } else { @@ -150,7 +151,7 @@ public final class RepresentativePointsD * Run the job using supplied arguments as a sequential process * @param conf * the Configuration to use - * @param input + * @param clusteredPointsIn * the directory pathname for input points * @param stateIn * the directory pathname for input state @@ -163,18 +164,18 @@ public final class RepresentativePointsD Path clusteredPointsIn, Path stateIn, Path stateOut, - DistanceMeasure measure) throws IOException, InstantiationException, IllegalAccessException { + DistanceMeasure measure) + throws IOException, InstantiationException, IllegalAccessException { Map<Integer, List<VectorWritable>> repPoints = RepresentativePointsMapper.getRepresentativePoints(conf, stateIn); Map<Integer, WeightedVectorWritable> mostDistantPoints = new HashMap<Integer, WeightedVectorWritable>(); FileSystem fs = FileSystem.get(clusteredPointsIn.toUri(), conf); FileStatus[] status = fs.listStatus(clusteredPointsIn, new OutputLogFilter()); - int part = 0; for (FileStatus s : status) { SequenceFile.Reader reader = new SequenceFile.Reader(fs, s.getPath(), conf); try { IntWritable key = (IntWritable) reader.getKeyClass().asSubclass(Writable.class).newInstance(); - WeightedVectorWritable vw = (WeightedVectorWritable) reader.getValueClass().asSubclass(Writable.class).newInstance(); + WeightedVectorWritable vw = reader.getValueClass().asSubclass(WeightedVectorWritable.class).newInstance(); while (reader.next(key, vw)) { RepresentativePointsMapper.mapPoint(key, vw, measure, repPoints, mostDistantPoints); } @@ -182,6 +183,7 @@ public final class RepresentativePointsD reader.close(); } } + int part = 0; SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), @@ -196,7 +198,8 @@ public final class RepresentativePointsD } finally { writer.close(); } - writer = new SequenceFile.Writer(fs, conf, new Path(stateOut, "part-m-" + part++), IntWritable.class, VectorWritable.class); + writer = new SequenceFile.Writer(fs, conf, + new Path(stateOut, "part-m-" + part++), IntWritable.class, VectorWritable.class); try { for (Map.Entry<Integer, WeightedVectorWritable> entry : mostDistantPoints.entrySet()) { writer.append(new IntWritable(entry.getKey()), new VectorWritable(entry.getValue().getVector())); @@ -219,8 +222,12 @@ public final class RepresentativePointsD * @param measure * the DistanceMeasure to use */ - private static void runIterationMR(Configuration conf, Path input, Path stateIn, Path stateOut, DistanceMeasure measure) - throws IOException, InterruptedException, ClassNotFoundException { + private static void runIterationMR(Configuration conf, + Path input, + Path stateIn, + Path stateOut, + DistanceMeasure measure) + throws IOException, InterruptedException, ClassNotFoundException { conf.set(STATE_IN_KEY, stateIn.toString()); conf.set(DISTANCE_MEASURE_KEY, measure.getClass().getName()); Job job = new Job(conf);
Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/arff/MapBackedARFFModel.java Fri Oct 1 11:00:16 2010 @@ -83,7 +83,7 @@ public class MapBackedARFFModel implemen ARFFType type = typeMap.get(idx); data = MapBackedARFFModel.QUOTE_PATTERN.matcher(data).replaceAll(""); data = data.trim(); - double result = 0.0; + double result; switch (type) { case NUMERIC: result = processNumeric(data); Modified: mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java (original) +++ mahout/trunk/utils/src/main/java/org/apache/mahout/utils/vectors/tfidf/TFIDFConverter.java Fri Oct 1 11:00:16 2010 @@ -261,7 +261,7 @@ public final class TFIDFConverter { * output directory were the partial vectors have to be created * @param sequentialAccess * output vectors should be optimized for sequential access - * @param namedVectors + * @param namedVector * output vectors should be named, retaining key (doc id) as a label */ private static void makePartialVectors(Path input, Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java Fri Oct 1 11:00:16 2010 @@ -152,14 +152,8 @@ public final class TestClusterDumper ext /** * Return the path to the final iteration's clusters - * - * @param conf - * @param output - * @param maxIterations - * @return - * @throws IOException */ - private Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException { + private static Path finalClusterPath(Configuration conf, Path output, int maxIterations) throws IOException { FileSystem fs = FileSystem.get(conf); for (int i = maxIterations; i >= 0; i--) { Path clusters = new Path(output, "clusters-" + i); Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterEvaluator.java Fri Oct 1 11:00:16 2010 @@ -59,7 +59,7 @@ public final class TestClusterEvaluator private List<VectorWritable> referenceData = new ArrayList<VectorWritable>(); - private List<VectorWritable> sampleData = new ArrayList<VectorWritable>(); + private final List<VectorWritable> sampleData = new ArrayList<VectorWritable>(); private Map<Integer, List<VectorWritable>> representativePoints; Modified: mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java URL: http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java?rev=1003478&r1=1003477&r2=1003478&view=diff ============================================================================== --- mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java (original) +++ mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/cdbw/TestCDbwEvaluator.java Fri Oct 1 11:00:16 2010 @@ -71,7 +71,7 @@ public final class TestCDbwEvaluator ext private FileSystem fs; - private List<VectorWritable> sampleData = new ArrayList<VectorWritable>(); + private final List<VectorWritable> sampleData = new ArrayList<VectorWritable>(); private List<VectorWritable> referenceData = new ArrayList<VectorWritable>();
