Author: srowen
Date: Thu Jun 23 21:05:11 2011
New Revision: 1139072

URL: http://svn.apache.org/viewvc?rev=1139072&view=rev
Log:
MAHOUT-708 update to Hadoop 0.20.203.0, which just entailed better logic to 
ignore new _SUCCESS files. The result still works in 0.20.2

Modified:
    mahout/trunk/core/pom.xml
    
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterable.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterable.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterator.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterable.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterable.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
    mahout/trunk/pom.xml

Modified: mahout/trunk/core/pom.xml
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/pom.xml?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- mahout/trunk/core/pom.xml (original)
+++ mahout/trunk/core/pom.xml Thu Jun 23 21:05:11 2011
@@ -143,6 +143,14 @@
       <groupId>org.apache.hadoop</groupId>
       <artifactId>hadoop-core</artifactId>
     </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-core-asl</artifactId>
+    </dependency>
+    <dependency>
+      <groupId>org.codehaus.jackson</groupId>
+      <artifactId>jackson-mapper-asl</artifactId>
+    </dependency>
 
     <dependency>
       <groupId>org.slf4j</groupId>

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/cf/taste/hadoop/als/eval/ParallelFactorizationEvaluator.java
 Thu Jun 23 21:05:11 2011
@@ -38,6 +38,7 @@ import org.apache.mahout.cf.taste.impl.c
 import org.apache.mahout.common.AbstractJob;
 import org.apache.mahout.common.IntPairWritable;
 import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterable;
 
@@ -108,7 +109,10 @@ public class ParallelFactorizationEvalua
   protected double computeRmse(Path errors) {
     RunningAverage average = new FullRunningAverage();
     for (Pair<DoubleWritable,NullWritable> entry :
-        new SequenceFileDirIterable<DoubleWritable, NullWritable>(errors, 
PathType.LIST, getConf())) {
+        new SequenceFileDirIterable<DoubleWritable, NullWritable>(errors,
+                                                                  
PathType.LIST,
+                                                                  
PathFilters.logsCRCFilter(),
+                                                                  getConf())) {
       DoubleWritable error = entry.getFirst();
       average.addDatum(error.get() * error.get());
     }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterable.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterable.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterable.java
 Thu Jun 23 21:05:11 2011
@@ -76,7 +76,7 @@ public final class SequenceFileDirIterab
     try {
       return new SequenceFileDirIterator<K, V>(path, pathType, filter, 
ordering, reuseKeyValueInstances, conf);
     } catch (IOException ioe) {
-      throw new IllegalStateException(ioe);
+      throw new IllegalStateException(path.toString(), ioe);
     }
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirIterator.java
 Thu Jun 23 21:05:11 2011
@@ -70,7 +70,7 @@ public final class SequenceFileDirIterat
                                 try {
                                   return new 
SequenceFileIterator<K,V>(from.getPath(), reuseKeyValueInstances, conf);
                                 } catch (IOException ioe) {
-                                  throw new IllegalStateException(ioe);
+                                  throw new 
IllegalStateException(from.getPath().toString(), ioe);
                                 }
                               }
                             });

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterable.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterable.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterable.java
 Thu Jun 23 21:05:11 2011
@@ -75,7 +75,7 @@ public final class SequenceFileDirValueI
     try {
       return new SequenceFileDirValueIterator<V>(path, pathType, filter, 
ordering, reuseKeyValueInstances, conf);
     } catch (IOException ioe) {
-      throw new IllegalStateException(ioe);
+      throw new IllegalStateException(path.toString(), ioe);
     }
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterator.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterator.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterator.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileDirValueIterator.java
 Thu Jun 23 21:05:11 2011
@@ -66,7 +66,7 @@ public final class SequenceFileDirValueI
                                 try {
                                   return new 
SequenceFileValueIterator<V>(from.getPath(), reuseKeyValueInstances, conf);
                                 } catch (IOException ioe) {
-                                  throw new IllegalStateException(ioe);
+                                  throw new 
IllegalStateException(from.getPath().toString(), ioe);
                                 }
                               }
                             });

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterable.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterable.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileIterable.java
 Thu Jun 23 21:05:11 2011
@@ -60,7 +60,7 @@ public final class SequenceFileIterable<
     try {
       return new SequenceFileIterator<K, V>(path, reuseKeyValueInstances, 
conf);
     } catch (IOException ioe) {
-      throw new IllegalStateException(ioe);
+      throw new IllegalStateException(path.toString(), ioe);
     }
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterable.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterable.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterable.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/common/iterator/sequencefile/SequenceFileValueIterable.java
 Thu Jun 23 21:05:11 2011
@@ -59,7 +59,7 @@ public final class SequenceFileValueIter
     try {
       return new SequenceFileValueIterator<V>(path, reuseKeyValueInstances, 
conf);
     } catch (IOException ioe) {
-      throw new IllegalStateException(ioe);
+      throw new IllegalStateException(path.toString(), ioe);
     }
   }
 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/ga/watchmaker/OutputUtils.java
 Thu Jun 23 21:05:11 2011
@@ -28,6 +28,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.DoubleWritable;
 import org.apache.hadoop.io.LongWritable;
 import org.apache.hadoop.io.SequenceFile;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import 
org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
 
 /** Utility Class that deals with the output. */
@@ -45,18 +46,13 @@ public final class OutputUtils {
    * @return {@code Path} array
    */
   public static Path[] listOutputFiles(FileSystem fs, Path outpath) throws 
IOException {
-    FileStatus[] status = fs.listStatus(outpath);
     Collection<Path> outpaths = Lists.newArrayList();
-    for (FileStatus s : status) {
+    for (FileStatus s : fs.listStatus(outpath, PathFilters.logsCRCFilter())) {
       if (!s.isDir()) {
         outpaths.add(s.getPath());
       }
     }
-    
-    Path[] outfiles = new Path[outpaths.size()];
-    outpaths.toArray(outfiles);
-    
-    return outfiles;
+    return outpaths.toArray(new Path[outpaths.size()]);
   }
   
   /**

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
 Thu Jun 23 21:05:11 2011
@@ -28,6 +28,7 @@ import org.apache.hadoop.io.WritableComp
 import org.apache.hadoop.mapred.JobClient;
 import org.apache.hadoop.mapred.JobConf;
 import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.common.iterator.sequencefile.PathType;
 import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirIterator;
 import org.apache.mahout.math.CardinalityException;
@@ -133,7 +134,11 @@ public class DistributedRowMatrix implem
     try {
       return Iterators.transform(
           new SequenceFileDirIterator<IntWritable,VectorWritable>(new 
Path(rowPath, "*"),
-                                                                  
PathType.GLOB, null, null, true, conf),
+                                                                  
PathType.GLOB,
+                                                                  
PathFilters.logsCRCFilter(),
+                                                                  null,
+                                                                  true,
+                                                                  conf),
           new Function<Pair<IntWritable,VectorWritable>,MatrixSlice>() {
             @Override
             public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) {

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 Thu Jun 23 21:05:11 2011
@@ -24,6 +24,7 @@ import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.MatrixSlice;
 import org.apache.mahout.math.RandomAccessSparseVector;
@@ -235,8 +236,8 @@ public final class TestDistributedRowMat
     Path outputTempPath = outputStatuses[0].getPath();
     Path inputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.INPUT_VECTOR);
     Path outputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
-    assertEquals(1, fs.listStatus(inputVectorPath).length);
-    assertEquals(1, fs.listStatus(outputVectorPath).length);
+    assertEquals(1, fs.listStatus(inputVectorPath, 
PathFilters.logsCRCFilter()).length);
+    assertEquals(1, fs.listStatus(outputVectorPath, 
PathFilters.logsCRCFilter()).length);
 
     assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
   }
@@ -272,8 +273,8 @@ public final class TestDistributedRowMat
     Path outputTempPath = outputStatuses[0].getPath();
     Path inputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.INPUT_VECTOR);
     Path outputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
-    assertEquals(1, fs.listStatus(inputVectorPath).length);
-    assertEquals(1, fs.listStatus(outputVectorPath).length);
+    assertEquals(1, fs.listStatus(inputVectorPath, 
PathFilters.logsCRCFilter()).length);
+    assertEquals(1, fs.listStatus(outputVectorPath, 
PathFilters.logsCRCFilter()).length);
     
     assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
   }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/vectorizer/DocumentProcessorTest.java
 Thu Jun 23 21:05:11 2011
@@ -26,13 +26,15 @@ import org.apache.hadoop.io.SequenceFile
 import org.apache.hadoop.io.Text;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.StringTuple;
+import org.apache.mahout.common.iterator.sequencefile.PathFilters;
 import org.junit.Test;
 
 import java.util.Arrays;
 
 /**
- * Tests tokenizing of <Text documentId, Text text> {@link SequenceFile}s by 
the {@link DocumentProcessor} into
- * <Text documentId, StringTuple tokens> sequence files
+ * Tests tokenizing of {@link SequenceFile}s containing document ID and text 
(both as {@link Text})
+ * by the {@link DocumentProcessor} into {@link SequenceFile}s of document ID 
and tokens (as
+ * {@link StringTuple}).
  */
 public class DocumentProcessorTest extends MahoutTestCase {
 
@@ -58,7 +60,7 @@ public class DocumentProcessorTest exten
 
     DocumentProcessor.tokenizeDocuments(input, DefaultAnalyzer.class, output, 
configuration);
 
-    FileStatus[] statuses = fs.listStatus(output);
+    FileStatus[] statuses = fs.listStatus(output, PathFilters.logsCRCFilter());
     assertEquals(1, statuses.length);
     Path filePath = statuses[0].getPath();
     SequenceFile.Reader reader = new SequenceFile.Reader(fs, filePath, 
configuration);

Modified: mahout/trunk/pom.xml
URL: 
http://svn.apache.org/viewvc/mahout/trunk/pom.xml?rev=1139072&r1=1139071&r2=1139072&view=diff
==============================================================================
--- mahout/trunk/pom.xml (original)
+++ mahout/trunk/pom.xml Thu Jun 23 21:05:11 2011
@@ -191,7 +191,7 @@
       <dependency>
         <groupId>org.apache.hadoop</groupId>
         <artifactId>hadoop-core</artifactId>
-        <version>0.20.2</version>
+        <version>0.20.203.0</version>
         <exclusions>
           <exclusion>
             <groupId>net.sf.kosmosfs</groupId>
@@ -259,6 +259,16 @@
           </exclusion>
         </exclusions>
       </dependency>
+      <dependency>
+        <groupId>org.codehaus.jackson</groupId>
+        <artifactId>jackson-core-asl</artifactId>
+        <version>1.8.2</version>
+      </dependency>
+      <dependency>
+        <groupId>org.codehaus.jackson</groupId>
+        <artifactId>jackson-mapper-asl</artifactId>
+        <version>1.8.2</version>
+      </dependency>
 
       <dependency>
         <groupId>commons-dbcp</groupId>


Reply via email to