Author: srowen
Date: Fri Apr 15 10:24:21 2011
New Revision: 1092656

URL: http://svn.apache.org/viewvc?rev=1092656&view=rev
Log:
MAHOUT-666 updated patch to clarify default behavior

Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
 Fri Apr 15 10:24:21 2011
@@ -62,7 +62,7 @@ import java.util.Iterator;
  *
  */
 public class DistributedRowMatrix implements VectorIterable, Configurable {
-  public static final String REMOVE_TEMP_DIRS = 
"DistributedMatrix.remove.temp.dirs";
+  public static final String KEEP_TEMP_FILES = 
"DistributedMatrix.keep.temp.files";
   
   private static final Logger log = 
LoggerFactory.getLogger(DistributedRowMatrix.class);
 
@@ -73,7 +73,7 @@ public class DistributedRowMatrix implem
   private Path outputTmpBasePath;
   private final int numRows;
   private final int numCols;
-  private boolean removeTempDirs;
+  private boolean keepTempFiles;
 
   public DistributedRowMatrix(Path inputPathString,
                               Path outputTmpPathString,
@@ -83,7 +83,7 @@ public class DistributedRowMatrix implem
     this.outputTmpPath = outputTmpPathString;
     this.numRows = numRows;
     this.numCols = numCols;
-    this.removeTempDirs = false;
+    this.keepTempFiles = false;
   }
 
   @Override
@@ -97,7 +97,7 @@ public class DistributedRowMatrix implem
     try {
       rowPath = FileSystem.get(conf).makeQualified(inputPath);
       outputTmpBasePath = FileSystem.get(conf).makeQualified(outputTmpPath);
-      removeTempDirs = conf.getBoolean(REMOVE_TEMP_DIRS, false);
+      keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false);
     } catch (IOException ioe) {
       throw new IllegalStateException(ioe);
     }
@@ -200,7 +200,7 @@ public class DistributedRowMatrix implem
                                              outputVectorTmpPath);
       JobClient.runJob(new JobConf(conf));
       Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
-      if (removeTempDirs) {
+      if (!keepTempFiles) {
         FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
         fs.delete(outputVectorTmpPath, true);
       }
@@ -223,7 +223,7 @@ public class DistributedRowMatrix implem
                                                     outputVectorTmpPath);
       JobClient.runJob(new JobConf(conf));
       Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
-      if (removeTempDirs) {
+      if (!keepTempFiles) {
         FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
         fs.delete(outputVectorTmpPath, true);
       }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
 Fri Apr 15 10:24:21 2011
@@ -163,7 +163,6 @@ public final class TimesSquaredJob {
     inputVectorPathWriter.close();
     URI ivpURI = inputVectorPath.toUri();
     DistributedCache.setCacheFiles(new URI[] {ivpURI}, conf);
-    fs.deleteOnExit(inputVectorPath);
 
     conf.set(INPUT_VECTOR, ivpURI.toString());
     conf.setBoolean(IS_SPARSE_OUTPUT, !(v instanceof DenseVector));
@@ -190,7 +189,6 @@ public final class TimesSquaredJob {
         new SequenceFileValueIterator<VectorWritable>(outputFile, true, conf);
     Vector vector = iterator.next().get();
     iterator.close();
-    fs.deleteOnExit(outputFile);
     return vector;
   }
 

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 Fri Apr 15 10:24:21 2011
@@ -217,17 +217,24 @@ public final class TestDistributedRowMat
 
     Vector result1 = dm.times(v);
 
-    assertEquals(1, fs.listStatus(outputPath).length);
+    assertEquals(0, fs.listStatus(outputPath).length);
     
     deleteContentsOfPath(conf, outputPath);
     assertEquals(0, fs.listStatus(outputPath).length);
     
-    conf.setBoolean(DistributedRowMatrix.REMOVE_TEMP_DIRS, true);
+    conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
     
     Vector result2 = dm.times(v);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    FileStatus[] outputStatuses = fs.listStatus(outputPath);
+    assertEquals(1, outputStatuses.length);
+    Path outputTempPath = outputStatuses[0].getPath();
+    Path inputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.INPUT_VECTOR);
+    Path outputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
+    assertEquals(1, fs.listStatus(inputVectorPath).length);
+    assertEquals(1, fs.listStatus(outputVectorPath).length);
+
     assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
   }
 
@@ -247,17 +254,24 @@ public final class TestDistributedRowMat
 
     Vector result1 = dm.timesSquared(v);
 
-    assertEquals(1, fs.listStatus(outputPath).length);
+    assertEquals(0, fs.listStatus(outputPath).length);
     
     deleteContentsOfPath(conf, outputPath);
     assertEquals(0, fs.listStatus(outputPath).length);
     
-    conf.setBoolean(DistributedRowMatrix.REMOVE_TEMP_DIRS, true);
+    conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
     
     Vector result2 = dm.timesSquared(v);
 
-    assertEquals(0, fs.listStatus(outputPath).length);
+    FileStatus[] outputStatuses = fs.listStatus(outputPath);
+    assertEquals(1, outputStatuses.length);
+    Path outputTempPath = outputStatuses[0].getPath();
+    Path inputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.INPUT_VECTOR);
+    Path outputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
+    assertEquals(1, fs.listStatus(inputVectorPath).length);
+    assertEquals(1, fs.listStatus(outputVectorPath).length);
+    
     assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
   }
 


Reply via email to