Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFConverter.java
 Sun Apr 17 15:30:15 2011
@@ -149,7 +149,8 @@ public final class TFIDFConverter {
     Path wordCountPath = new Path(output, WORDCOUNT_OUTPUT_FOLDER);
 
     startDFCounting(input, wordCountPath, baseConf);
-    Pair<Long[], List<Path>> datasetFeatures = 
createDictionaryChunks(wordCountPath, output, baseConf, chunkSizeInMegabytes);
+    Pair<Long[], List<Path>> datasetFeatures =
+        createDictionaryChunks(wordCountPath, output, baseConf, 
chunkSizeInMegabytes);
 
     int partialVectorIndex = 0;
     List<Path> partialVectorPaths = new ArrayList<Path>();
@@ -210,8 +211,13 @@ public final class TFIDFConverter {
     long featureCount = 0;
     long vectorCount = Long.MAX_VALUE;
     Path filesPattern = new Path(featureCountPath, OUTPUT_FILES_PATTERN);
-    for (Pair<IntWritable,LongWritable> record :
-         new SequenceFileDirIterable<IntWritable,LongWritable>(filesPattern, 
PathType.GLOB, null, null, true, conf)) {
+    for (Pair<IntWritable,LongWritable> record
+         : new SequenceFileDirIterable<IntWritable,LongWritable>(filesPattern,
+                                                                 PathType.GLOB,
+                                                                 null,
+                                                                 null,
+                                                                 true,
+                                                                 conf)) {
 
       if (currentChunkSize > chunkSizeLimit) {
         freqWriter.close();
@@ -236,7 +242,7 @@ public final class TFIDFConverter {
       featureCount = Math.max(key.get(), featureCount);
 
     }
-  featureCount++;
+    featureCount++;
     freqWriter.close();
     Long[] counts = {featureCount, vectorCount};
     return new Pair<Long[], List<Path>>(counts, chunkPaths);
@@ -323,7 +329,7 @@ public final class TFIDFConverter {
         + "org.apache.hadoop.io.serializer.WritableSerialization");
     
     Job job = new Job(conf);
-    job.setJobName("VectorTfIdf Document Frequency Count running over input: " 
+ input.toString());
+    job.setJobName("VectorTfIdf Document Frequency Count running over input: " 
+ input);
     job.setJarByClass(TFIDFConverter.class);
     
     job.setOutputKeyClass(IntWritable.class);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/vectorizer/tfidf/TFIDFPartialVectorReducer.java
 Sun Apr 17 15:30:15 2011
@@ -115,8 +115,8 @@ public class TFIDFPartialVectorReducer e
 
     Path dictionaryFile = new Path(localFiles[0].getPath());
     // key is feature, value is the document frequency
-    for (Pair<IntWritable,LongWritable> record :
-         new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, 
true, conf)) {
+    for (Pair<IntWritable,LongWritable> record 
+         : new SequenceFileIterable<IntWritable,LongWritable>(dictionaryFile, 
true, conf)) {
       dictionary.put(record.getFirst().get(), record.getSecond().get());
     }
   }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/cf/taste/impl/TasteTestCase.java
 Sun Apr 17 15:30:15 2011
@@ -32,7 +32,6 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.OutputStreamWriter;
 import java.io.PrintWriter;
-import java.nio.charset.Charset;
 import java.util.ArrayList;
 import java.util.List;
 

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/canopy/TestCanopyCreation.java
 Sun Apr 17 15:30:15 2011
@@ -397,9 +397,10 @@ public final class TestCanopyCreation ex
         "org.apache.mahout.common.distance.ManhattanDistanceMeasure");
     conf.set(CanopyConfigKeys.T1_KEY, String.valueOf(3.1));
     conf.set(CanopyConfigKeys.T2_KEY, String.valueOf(2.1));
-    DummyRecordWriter<IntWritable, WeightedVectorWritable> writer = new 
DummyRecordWriter<IntWritable, WeightedVectorWritable>();
-    Mapper<WritableComparable<?>, VectorWritable, IntWritable, 
WeightedVectorWritable>.Context context = DummyRecordWriter
-        .build(mapper, conf, writer);
+    DummyRecordWriter<IntWritable, WeightedVectorWritable> writer =
+        new DummyRecordWriter<IntWritable, WeightedVectorWritable>();
+    Mapper<WritableComparable<?>, VectorWritable, IntWritable, 
WeightedVectorWritable>.Context context =
+        DummyRecordWriter.build(mapper, conf, writer);
     mapper.setup(context);
 
     Collection<Canopy> canopies = new ArrayList<Canopy>();
@@ -645,7 +646,7 @@ public final class TestCanopyCreation ex
     Reducer<Text, VectorWritable, Text, Canopy>.Context context = 
DummyRecordWriter
         .build(reducer, conf, writer, Text.class, VectorWritable.class);
     reducer.setup(context);
-    assertEquals(1.1, reducer.canopyClusterer.t1, EPSILON);
-    assertEquals(0.1, reducer.canopyClusterer.t2, EPSILON);
+    assertEquals(1.1, reducer.canopyClusterer.getT1(), EPSILON);
+    assertEquals(0.1, reducer.canopyClusterer.getT2(), EPSILON);
   }
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/fuzzykmeans/TestFuzzyKmeansClustering.java
 Sun Apr 17 15:30:15 2011
@@ -520,7 +520,7 @@ public final class TestFuzzyKmeansCluste
         String clusterId = key.getIdentifier();
         List<SoftCluster> values = reducerWriter.getValue(new Text(clusterId));
         SoftCluster cluster = values.get(0);
-        System.out.println("ref= " + key.toString() + " cluster= " + 
cluster.toString());
+        System.out.println("ref= " + key.toString() + " cluster= " + cluster);
         cluster.computeParameters();
         assertEquals("key center: " + 
AbstractCluster.formatVector(key.getCenter(), null) + " does not equal cluster: 
"
             + AbstractCluster.formatVector(cluster.getCenter(), null), 
key.getCenter(), cluster.getCenter());

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/meanshift/TestMeanShift.java
 Sun Apr 17 15:30:15 2011
@@ -34,6 +34,7 @@ import org.apache.hadoop.io.WritableComp
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.hadoop.mapreduce.Reducer;
 import org.apache.hadoop.util.ToolRunner;
+import org.apache.mahout.clustering.Cluster;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.common.DummyRecordWriter;
 import org.apache.mahout.common.HadoopUtil;
@@ -331,12 +332,12 @@ public final class TestMeanShift extends
     long count = HadoopUtil.countRecords(outPart, conf);
     assertEquals("count", 3, count);
     outPart = new Path(output, "clusters-0/part-m-00000");
-       Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart, 
true, conf);
-       // now test the initial clusters to ensure the type of their centers 
has been retained
-       while (iterator.hasNext()) {
-         MeanShiftCanopy canopy = (MeanShiftCanopy) iterator.next();
-         assertTrue(canopy.getCenter()instanceof DenseVector);
-       }
+    Iterator<?> iterator = new SequenceFileValueIterator<Writable>(outPart, 
true, conf);
+    // now test the initial clusters to ensure the type of their centers has 
been retained
+    while (iterator.hasNext()) {
+      Cluster canopy = (Cluster) iterator.next();
+      assertTrue(canopy.getCenter() instanceof DenseVector);
+    }
   }
 
   /**
@@ -356,7 +357,7 @@ public final class TestMeanShift extends
     ClusteringTestUtils.writePointsToFile(points, 
getTestTempFilePath("testdata/file2"), fs, conf);
     // now run the Job using the run() command. Other tests can continue to 
use runJob().
     Path output = getTestTempDirPath("output");
-    System.out.println("Output Path: " + output.toString());
+    System.out.println("Output Path: " + output);
     //MeanShiftCanopyDriver.runJob(input, output, 
EuclideanDistanceMeasure.class.getName(), 4, 1, 0.5, 10, false, false);
     String[] args = { optKey(DefaultOptionCreator.INPUT_OPTION), 
getTestTempDirPath("testdata").toString(),
         optKey(DefaultOptionCreator.OUTPUT_OPTION), output.toString(), 
optKey(DefaultOptionCreator.DISTANCE_MEASURE_OPTION),

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/minhash/TestMinHashClustering.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/clustering/minhash/TestMinHashClustering.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/minhash/TestMinHashClustering.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/clustering/minhash/TestMinHashClustering.java
 Sun Apr 17 15:30:15 2011
@@ -147,7 +147,7 @@ public class TestMinHashClustering exten
   public void testLinearMinHashMRJob() throws Exception {
     String[] args = makeArguments(2, 3, 20, 3, HashType.LINEAR.toString());
     int ret = ToolRunner.run(new Configuration(), new MinHashDriver(), args);
-    assertEquals("Minhash MR Job failed for " + HashType.LINEAR.toString(), 0, 
ret);
+    assertEquals("Minhash MR Job failed for " + HashType.LINEAR, 0, ret);
     verify(output, 0.2, "Hash Type: LINEAR");
   }
   
@@ -155,7 +155,7 @@ public class TestMinHashClustering exten
   public void testPolynomialMinHashMRJob() throws Exception {
     String[] args = makeArguments(2, 3, 20, 3, HashType.POLYNOMIAL.toString());
     int ret = ToolRunner.run(new Configuration(), new MinHashDriver(), args);
-    assertEquals("Minhash MR Job failed for " + 
HashType.POLYNOMIAL.toString(), 0, ret);
+    assertEquals("Minhash MR Job failed for " + HashType.POLYNOMIAL, 0, ret);
     verify(output, 0.3, "Hash Type: POLYNOMIAL");
   }
   
@@ -163,7 +163,7 @@ public class TestMinHashClustering exten
   public void testMurmurMinHashMRJob() throws Exception {
     String[] args = makeArguments(2, 3, 20, 4, HashType.MURMUR.toString());
     int ret = ToolRunner.run(new Configuration(), new MinHashDriver(), args);
-    assertEquals("Minhash MR Job failed for " + HashType.MURMUR.toString(), 0, 
ret);
+    assertEquals("Minhash MR Job failed for " + HashType.MURMUR, 0, ret);
     verify(output, 0.3, "Hash Type: MURMUR");
   }
   

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java 
(original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/common/AbstractJobTest.java 
Sun Apr 17 15:30:15 2011
@@ -232,5 +232,5 @@ public final class AbstractJobTest exten
         testInputPath, job.getInputPath().toString());
     assertEquals("output command-line option precedes property",
         testOutputPath, job.getOutputPath().toString());
-       }
+  }
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/common/distance/DefaultDistanceMeasureTest.java
 Sun Apr 17 15:30:15 2011
@@ -87,7 +87,7 @@ public abstract class DefaultDistanceMea
     for (int a = 0; a < 4; a++) {
       for (int b = 0; b < 4; b++) {
         assertTrue("Distance between vectors less than zero: " 
-                   + distanceMatrix[a][b] + " = " + distanceMeasure.toString() 
+ 
+                   + distanceMatrix[a][b] + " = " + distanceMeasure +
                    ".distance("+ vectors[a].asFormatString() + ", " 
                    + vectors[b].asFormatString() + ')',
                    distanceMatrix[a][b] >= 0);

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/df/mapreduce/partial/MockContext.java
 Sun Apr 17 15:30:15 2011
@@ -31,10 +31,10 @@ final class MockContext extends Context 
 
   private final TreeID[] keys;
   private final MapredOutput[] values;
-  private int index ;
+  private int index;
 
-  MockContext(Mapper<?,?,?,?> mapper, Configuration conf, TaskAttemptID taskid,
-      int nbTrees) throws IOException, InterruptedException {
+  MockContext(Mapper<?,?,?,?> mapper, Configuration conf, TaskAttemptID 
taskid, int nbTrees)
+    throws IOException, InterruptedException {
     mapper.super(conf, taskid, null, null, null, null, null);
 
     keys = new TreeID[nbTrees];

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest.java
 Sun Apr 17 15:30:15 2011
@@ -18,8 +18,6 @@
 package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.File;
-import java.io.FileOutputStream;
-import java.io.OutputStreamWriter;
 import java.io.Writer;
 import java.util.ArrayList;
 import java.util.Collection;

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java 
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/Algebra.java Sun Apr 
17 15:30:15 2011
@@ -54,10 +54,10 @@ public final class Algebra {
   /**
    * Compute Maximum Absolute Row Sum Norm of input Matrix m
    * http://mathworld.wolfram.com/MaximumAbsoluteRowSumNorm.html 
-   */  
+   */
   public static double getNorm(Matrix m) {
     double max = 0.0;
-    for (int i = 0; i < m.numRows(); i++)      {
+    for (int i = 0; i < m.numRows(); i++) {
       int sum = 0;
       Vector cv = m.getRow(i);
       for (int j = 0; j < cv.size(); j++) {
@@ -68,6 +68,6 @@ public final class Algebra {
       }
     }
     return max;
-  }    
-  
+  }
+
 }

Modified: 
mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
 (original)
+++ 
mahout/trunk/math/src/main/java/org/apache/mahout/math/SingularValueDecomposition.java
 Sun Apr 17 15:30:15 2011
@@ -34,7 +34,7 @@ public class SingularValueDecomposition 
    */
   public SingularValueDecomposition(Matrix arg) {
     if (arg.numRows() < arg.numCols()) {
-      transpositionNeeded = true;                               
+      transpositionNeeded = true;
     }
     
     // Derived from LINPACK code.

Modified: mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java 
(original)
+++ mahout/trunk/math/src/main/java/org/apache/mahout/math/VectorView.java Sun 
Apr 17 15:30:15 2011
@@ -153,7 +153,7 @@ public class VectorView extends Abstract
 
   }
 
-  private class DecoratorElement implements Element {
+  private final class DecoratorElement implements Element {
 
     private final Element decorated;
 

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/benchmark/VectorBenchmarks.java
 Sun Apr 17 15:30:15 2011
@@ -707,8 +707,8 @@ public class VectorBenchmarks {
       
abuilder.withName("nv").withMinimum(1).withMaximum(1).create()).withDescription(
       "Number of Vectors to create. Default: 
100").withShortName("nv").create();
     Option numClustersOpt = 
obuilder.withLongName("numClusters").withRequired(false).withArgument(
-             
abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription(
-             "Number of Vectors to create. Default: 
10").withShortName("vs").create();
+          
abuilder.withName("vs").withMinimum(1).withMaximum(1).create()).withDescription(
+          "Number of Vectors to create. Default: 
10").withShortName("vs").create();
     Option loopOpt = 
obuilder.withLongName("loop").withRequired(false).withArgument(
       
abuilder.withName("loop").withMinimum(1).withMaximum(1).create()).withDescription(
       "Number of times to loop. Default: 200").withShortName("l").create();
@@ -741,7 +741,7 @@ public class VectorBenchmarks {
       
       int numClusters=25;
       if (cmdLine.hasOption(numClustersOpt)) {
-         numClusters = Integer.parseInt((String) 
cmdLine.getValue(numClustersOpt));          
+        numClusters = Integer.parseInt((String) 
cmdLine.getValue(numClustersOpt));
       }
 
       int sparsity = 1000;

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/RepresentativePointsMapper.java
 Sun Apr 17 15:30:15 2011
@@ -26,12 +26,12 @@ import java.util.Map;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.OutputLogFilter;
 import org.apache.hadoop.mapreduce.Mapper;
 import org.apache.mahout.clustering.WeightedVectorWritable;
 import org.apache.mahout.common.Pair;
 import org.apache.mahout.common.distance.DistanceMeasure;
 import org.apache.mahout.common.distance.EuclideanDistanceMeasure;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
 import org.apache.mahout.math.VectorWritable;
@@ -107,9 +107,11 @@ public class RepresentativePointsMapper
 
   public static Map<Integer, List<VectorWritable>> 
getRepresentativePoints(Configuration conf, Path statePath) {
     Map<Integer, List<VectorWritable>> representativePoints = new 
HashMap<Integer, List<VectorWritable>>();
-    for (Pair<IntWritable,VectorWritable> record :
-         new SequenceFileDirIterable<IntWritable,VectorWritable>(
-             statePath, PathType.LIST, new OutputLogFilter(), conf)) {
+    for (Pair<IntWritable,VectorWritable> record
+         : new SequenceFileDirIterable<IntWritable,VectorWritable>(statePath,
+                                                                   
PathType.LIST,
+                                                                   
PathFilters.logsCRCFilter(),
+                                                                   conf)) {
       int keyValue = record.getFirst().get();
       List<VectorWritable> repPoints = representativePoints.get(keyValue);
       if (repPoints == null) {

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/text/MailArchivesClusteringAnalyzer.java
 Sun Apr 17 15:30:15 2011
@@ -43,90 +43,90 @@ public class MailArchivesClusteringAnaly
   // extended set of stop words composed of common mail terms like "hi",
   // HTML tags, and Java keywords asmany of the messages in the archives
   // are subversion check-in notifications
-       private static final String[] STOP_WORDS = {
-         
"3d","7bit","a0","about","above","abstract","across","additional","after",
-         "afterwards","again","against","align","all","almost","alone","along",
-         
"already","also","although","always","am","among","amongst","amoungst",
-         
"amount","an","and","another","any","anybody","anyhow","anyone","anything",
-         "anyway","anywhere","are","arial","around","as","ascii","assert","at",
-         
"back","background","base64","bcc","be","became","because","become","becomes",
-         
"becoming","been","before","beforehand","behind","being","below","beside",
-         
"besides","between","beyond","bgcolor","blank","blockquote","body","boolean",
-         
"border","both","br","break","but","by","can","cannot","cant","case","catch",
-         
"cc","cellpadding","cellspacing","center","char","charset","cheers","class",
-         
"co","color","colspan","com","con","const","continue","could","couldnt",
-         
"cry","css","de","dear","default","did","didnt","different","div","do",
-         
"does","doesnt","done","dont","double","down","due","during","each","eg",
-         
"eight","either","else","elsewhere","empty","encoding","enough","enum",
-         "etc","eu","even","ever","every","everyone","everything","everywhere",
-         
"except","extends","face","family","few","ffffff","final","finally","float",
-         
"font","for","former","formerly","fri","from","further","get","give","go",
-         
"good","got","goto","gt","h1","ha","had","has","hasnt","have","he","head",
-         
"height","hello","helvetica","hence","her","here","hereafter","hereby",
-         "herein","hereupon","hers","herself","hi","him","himself","his","how",
-         "however","hr","href","html","http","https","id","ie","if","ill","im",
-         
"image","img","implements","import","in","inc","instanceof","int","interface",
-         
"into","is","isnt","iso-8859-1","it","its","itself","ive","just","keep",
-         
"last","latter","latterly","least","left","less","li","like","long","look",
-         
"lt","ltd","mail","mailto","many","margin","may","me","meanwhile","message",
-         
"meta","might","mill","mine","mon","more","moreover","most","mostly","mshtml",
-         
"mso","much","must","my","myself","name","namely","native","nbsp","need",
-         
"neither","never","nevertheless","new","next","nine","no","nobody","none",
-         
"noone","nor","not","nothing","now","nowhere","null","of","off","often",
-         
"ok","on","once","only","onto","or","org","other","others","otherwise",
-         
"our","ours","ourselves","out","over","own","package","pad","per","perhaps",
-         
"plain","please","pm","printable","private","protected","public","put",
-         
"quot","quote","r1","r2","rather","re","really","regards","reply","return",
-         
"right","said","same","sans","sat","say","saying","see","seem","seemed",
-         
"seeming","seems","serif","serious","several","she","short","should","show",
-         
"side","since","sincere","six","sixty","size","so","solid","some","somehow",
-         "someone","something","sometime","sometimes","somewhere","span","src",
-         
"static","still","strictfp","string","strong","style","stylesheet","subject",
-         
"such","sun","super","sure","switch","synchronized","table","take","target",
-         
"td","text","th","than","thanks","that","the","their","them","themselves",
-         
"then","thence","there","thereafter","thereby","therefore","therein","thereupon",
-         "these","they","thick","thin","think","third","this","those","though",
-         
"three","through","throughout","throw","throws","thru","thu","thus","tm",
-         
"to","together","too","top","toward","towards","tr","transfer","transient",
-         
"try","tue","type","ul","un","under","unsubscribe","until","up","upon",
-         
"us","use","used","uses","using","valign","verdana","very","via","void",
-         
"volatile","want","was","we","wed","weight","well","were","what","whatever",
-         
"when","whence","whenever","where","whereafter","whereas","whereby","wherein",
-         
"whereupon","wherever","whether","which","while","whither","who","whoever",
-         "whole","whom","whose","why","width","will","with","within","without",
-         
"wont","would","wrote","www","yes","yet","you","your","yours","yourself",
-         "yourselves"
-       };
+  private static final String[] STOP_WORDS = {
+    "3d","7bit","a0","about","above","abstract","across","additional","after",
+    "afterwards","again","against","align","all","almost","alone","along",
+    "already","also","although","always","am","among","amongst","amoungst",
+    "amount","an","and","another","any","anybody","anyhow","anyone","anything",
+    "anyway","anywhere","are","arial","around","as","ascii","assert","at",
+    
"back","background","base64","bcc","be","became","because","become","becomes",
+    "becoming","been","before","beforehand","behind","being","below","beside",
+    
"besides","between","beyond","bgcolor","blank","blockquote","body","boolean",
+    
"border","both","br","break","but","by","can","cannot","cant","case","catch",
+    
"cc","cellpadding","cellspacing","center","char","charset","cheers","class",
+    "co","color","colspan","com","con","const","continue","could","couldnt",
+    "cry","css","de","dear","default","did","didnt","different","div","do",
+    "does","doesnt","done","dont","double","down","due","during","each","eg",
+    "eight","either","else","elsewhere","empty","encoding","enough","enum",
+    "etc","eu","even","ever","every","everyone","everything","everywhere",
+    
"except","extends","face","family","few","ffffff","final","finally","float",
+    "font","for","former","formerly","fri","from","further","get","give","go",
+    "good","got","goto","gt","h1","ha","had","has","hasnt","have","he","head",
+    "height","hello","helvetica","hence","her","here","hereafter","hereby",
+    "herein","hereupon","hers","herself","hi","him","himself","his","how",
+    "however","hr","href","html","http","https","id","ie","if","ill","im",
+    
"image","img","implements","import","in","inc","instanceof","int","interface",
+    "into","is","isnt","iso-8859-1","it","its","itself","ive","just","keep",
+    "last","latter","latterly","least","left","less","li","like","long","look",
+    
"lt","ltd","mail","mailto","many","margin","may","me","meanwhile","message",
+    
"meta","might","mill","mine","mon","more","moreover","most","mostly","mshtml",
+    "mso","much","must","my","myself","name","namely","native","nbsp","need",
+    "neither","never","nevertheless","new","next","nine","no","nobody","none",
+    "noone","nor","not","nothing","now","nowhere","null","of","off","often",
+    "ok","on","once","only","onto","or","org","other","others","otherwise",
+    
"our","ours","ourselves","out","over","own","package","pad","per","perhaps",
+    "plain","please","pm","printable","private","protected","public","put",
+    "quot","quote","r1","r2","rather","re","really","regards","reply","return",
+    "right","said","same","sans","sat","say","saying","see","seem","seemed",
+    
"seeming","seems","serif","serious","several","she","short","should","show",
+    
"side","since","sincere","six","sixty","size","so","solid","some","somehow",
+    "someone","something","sometime","sometimes","somewhere","span","src",
+    
"static","still","strictfp","string","strong","style","stylesheet","subject",
+    
"such","sun","super","sure","switch","synchronized","table","take","target",
+    "td","text","th","than","thanks","that","the","their","them","themselves",
+    
"then","thence","there","thereafter","thereby","therefore","therein","thereupon",
+    "these","they","thick","thin","think","third","this","those","though",
+    "three","through","throughout","throw","throws","thru","thu","thus","tm",
+    "to","together","too","top","toward","towards","tr","transfer","transient",
+    "try","tue","type","ul","un","under","unsubscribe","until","up","upon",
+    "us","use","used","uses","using","valign","verdana","very","via","void",
+    
"volatile","want","was","we","wed","weight","well","were","what","whatever",
+    
"when","whence","whenever","where","whereafter","whereas","whereby","wherein",
+    "whereupon","wherever","whether","which","while","whither","who","whoever",
+    "whole","whom","whose","why","width","will","with","within","without",
+    "wont","would","wrote","www","yes","yet","you","your","yours","yourself",
+    "yourselves"
+  };
 
-       // Regex used to exclude non-alpha-numeric tokens
+  // Regex used to exclude non-alpha-numeric tokens
   private static final Pattern alphaNumeric = 
Pattern.compile("^[a-z][a-z0-9_]+$");
   private final CharArraySet stopSet;
 
-       public MailArchivesClusteringAnalyzer() {
-               stopSet = 
(CharArraySet)StopFilter.makeStopSet(Arrays.asList(STOP_WORDS));
+  public MailArchivesClusteringAnalyzer() {
+    stopSet = (CharArraySet)StopFilter.makeStopSet(Arrays.asList(STOP_WORDS));
     /*
-               Collection<String> tmp = new java.util.TreeSet<String>();
+    Collection<String> tmp = new java.util.TreeSet<String>();
     for (Object entry : stopSet) {
       tmp.add(entry.toString());
     }
      */
-       }
+  }
+
+  public MailArchivesClusteringAnalyzer(CharArraySet stopSet) {
+    this.stopSet = stopSet;
+  }
 
-       public MailArchivesClusteringAnalyzer(CharArraySet stopSet) {
-               this.stopSet = stopSet;
-       }
-
-       @Override
-       public TokenStream tokenStream(String fieldName, java.io.Reader reader) 
{
-               @SuppressWarnings("deprecation")
-               TokenStream result = new 
StandardTokenizer(Version.LUCENE_CURRENT, reader);
-               result = new StandardFilter(result);
-               result = new LowerCaseFilter(result);
+  @Override
+  public TokenStream tokenStream(String fieldName, java.io.Reader reader) {
+    @SuppressWarnings("deprecation")
+    TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
+    result = new StandardFilter(result);
+    result = new LowerCaseFilter(result);
     result = new ASCIIFoldingFilter(result);
     result = new AlphaNumericMaxLengthFilter(result);
-               result = new StopFilter(false, result, stopSet);
-               return new PorterStemFilter(result);
-       }
+    result = new StopFilter(false, result, stopSet);
+    return new PorterStemFilter(result);
+  }
 
   /**
    * Matches alpha-numeric tokens between 2 and 40 chars long.
@@ -136,35 +136,35 @@ public class MailArchivesClusteringAnaly
     private final char[] output = new char[28];
     private final Matcher matcher;
 
-         AlphaNumericMaxLengthFilter(TokenStream in) {
-           super(in);
-           termAtt = addAttribute(TermAttribute.class);
-           matcher = alphaNumeric.matcher("foo");
-         }
-
-         @Override
-         public final boolean incrementToken() throws IOException {
-           // return the first alpha-numeric token between 2 and 40 length
-           while (input.incrementToken()) {
-             int length = termAtt.termLength();
-             if (length >= 2 && length <= 28) {
-               char[] buf = termAtt.termBuffer();
-               int at = 0;
-               for (int c=0; c < length; c++) {
-                 char ch = buf[c];
-                 if (ch != '\'') {
-                   output[at++] = ch;
-                 }
-               }
-               String term = new String(output, 0, at);
-               matcher.reset(term);
-               if (matcher.matches() && !term.startsWith("a0")) {
+    AlphaNumericMaxLengthFilter(TokenStream in) {
+      super(in);
+      termAtt = addAttribute(TermAttribute.class);
+      matcher = alphaNumeric.matcher("foo");
+    }
+
+    @Override
+    public final boolean incrementToken() throws IOException {
+      // return the first alpha-numeric token between 2 and 40 length
+      while (input.incrementToken()) {
+        int length = termAtt.termLength();
+        if (length >= 2 && length <= 28) {
+          char[] buf = termAtt.termBuffer();
+          int at = 0;
+          for (int c=0; c < length; c++) {
+            char ch = buf[c];
+            if (ch != '\'') {
+              output[at++] = ch;
+            }
+          }
+          String term = new String(output, 0, at);
+          matcher.reset(term);
+          if (matcher.matches() && !term.startsWith("a0")) {
             termAtt.setTermBuffer(term);
-            return true;                   
-               }
-             }
-           }
-           return false;
-         }
+            return true;
+          }
+        }
+      }
+      return false;
+    }
   }
 }

Modified: 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/eval/ParallelFactorizationEvaluator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/utils/eval/ParallelFactorizationEvaluator.java?rev=1094158&r1=1094157&r2=1094158&view=diff
==============================================================================
--- 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/eval/ParallelFactorizationEvaluator.java
 (original)
+++ 
mahout/trunk/utils/src/main/java/org/apache/mahout/utils/eval/ParallelFactorizationEvaluator.java
 Sun Apr 17 15:30:15 2011
@@ -18,13 +18,11 @@
 package org.apache.mahout.utils.eval;
 
 import org.apache.hadoop.fs.FSDataOutputStream;
-import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Text;
 import org.apache.hadoop.mapreduce.Job;
 import org.apache.hadoop.mapreduce.Mapper;
@@ -88,7 +86,7 @@ public class ParallelFactorizationEvalua
         "--itemFeatures", parsedArgs.get("--itemFeatures"),
         "--tempDir", tempDir.toString() });
 
-    Job estimationErrors = prepareJob(new Path(parsedArgs.get("--pairs") + "," 
+ predictions.toString()), errors,
+    Job estimationErrors = prepareJob(new Path(parsedArgs.get("--pairs") + ',' 
+ predictions), errors,
         TextInputFormat.class, PairsWithRatingMapper.class, 
IntPairWritable.class, DoubleWritable.class,
         ErrorReducer.class, DoubleWritable.class, NullWritable.class, 
SequenceFileOutputFormat.class);
     estimationErrors.waitForCompletion(true);


Reply via email to