Author: srowen
Date: Fri Apr 15 10:24:21 2011
New Revision: 1092656
URL: http://svn.apache.org/viewvc?rev=1092656&view=rev
Log:
MAHOUT-666 updated patch to clarify default behavior
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
Fri Apr 15 10:24:21 2011
@@ -62,7 +62,7 @@ import java.util.Iterator;
*
*/
public class DistributedRowMatrix implements VectorIterable, Configurable {
- public static final String REMOVE_TEMP_DIRS =
"DistributedMatrix.remove.temp.dirs";
+ public static final String KEEP_TEMP_FILES =
"DistributedMatrix.keep.temp.files";
private static final Logger log =
LoggerFactory.getLogger(DistributedRowMatrix.class);
@@ -73,7 +73,7 @@ public class DistributedRowMatrix implem
private Path outputTmpBasePath;
private final int numRows;
private final int numCols;
- private boolean removeTempDirs;
+ private boolean keepTempFiles;
public DistributedRowMatrix(Path inputPathString,
Path outputTmpPathString,
@@ -83,7 +83,7 @@ public class DistributedRowMatrix implem
this.outputTmpPath = outputTmpPathString;
this.numRows = numRows;
this.numCols = numCols;
- this.removeTempDirs = false;
+ this.keepTempFiles = false;
}
@Override
@@ -97,7 +97,7 @@ public class DistributedRowMatrix implem
try {
rowPath = FileSystem.get(conf).makeQualified(inputPath);
outputTmpBasePath = FileSystem.get(conf).makeQualified(outputTmpPath);
- removeTempDirs = conf.getBoolean(REMOVE_TEMP_DIRS, false);
+ keepTempFiles = conf.getBoolean(KEEP_TEMP_FILES, false);
} catch (IOException ioe) {
throw new IllegalStateException(ioe);
}
@@ -200,7 +200,7 @@ public class DistributedRowMatrix implem
outputVectorTmpPath);
JobClient.runJob(new JobConf(conf));
Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
- if (removeTempDirs) {
+ if (!keepTempFiles) {
FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
fs.delete(outputVectorTmpPath, true);
}
@@ -223,7 +223,7 @@ public class DistributedRowMatrix implem
outputVectorTmpPath);
JobClient.runJob(new JobConf(conf));
Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
- if (removeTempDirs) {
+ if (!keepTempFiles) {
FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
fs.delete(outputVectorTmpPath, true);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
Fri Apr 15 10:24:21 2011
@@ -163,7 +163,6 @@ public final class TimesSquaredJob {
inputVectorPathWriter.close();
URI ivpURI = inputVectorPath.toUri();
DistributedCache.setCacheFiles(new URI[] {ivpURI}, conf);
- fs.deleteOnExit(inputVectorPath);
conf.set(INPUT_VECTOR, ivpURI.toString());
conf.setBoolean(IS_SPARSE_OUTPUT, !(v instanceof DenseVector));
@@ -190,7 +189,6 @@ public final class TimesSquaredJob {
new SequenceFileValueIterator<VectorWritable>(outputFile, true, conf);
Vector vector = iterator.next().get();
iterator.close();
- fs.deleteOnExit(outputFile);
return vector;
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1092656&r1=1092655&r2=1092656&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
Fri Apr 15 10:24:21 2011
@@ -217,17 +217,24 @@ public final class TestDistributedRowMat
Vector result1 = dm.times(v);
- assertEquals(1, fs.listStatus(outputPath).length);
+ assertEquals(0, fs.listStatus(outputPath).length);
deleteContentsOfPath(conf, outputPath);
assertEquals(0, fs.listStatus(outputPath).length);
- conf.setBoolean(DistributedRowMatrix.REMOVE_TEMP_DIRS, true);
+ conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
dm.setConf(conf);
Vector result2 = dm.times(v);
- assertEquals(0, fs.listStatus(outputPath).length);
+ FileStatus[] outputStatuses = fs.listStatus(outputPath);
+ assertEquals(1, outputStatuses.length);
+ Path outputTempPath = outputStatuses[0].getPath();
+ Path inputVectorPath = new Path(outputTempPath,
TimesSquaredJob.INPUT_VECTOR);
+ Path outputVectorPath = new Path(outputTempPath,
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
+ assertEquals(1, fs.listStatus(inputVectorPath).length);
+ assertEquals(1, fs.listStatus(outputVectorPath).length);
+
assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
}
@@ -247,17 +254,24 @@ public final class TestDistributedRowMat
Vector result1 = dm.timesSquared(v);
- assertEquals(1, fs.listStatus(outputPath).length);
+ assertEquals(0, fs.listStatus(outputPath).length);
deleteContentsOfPath(conf, outputPath);
assertEquals(0, fs.listStatus(outputPath).length);
- conf.setBoolean(DistributedRowMatrix.REMOVE_TEMP_DIRS, true);
+ conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
dm.setConf(conf);
Vector result2 = dm.timesSquared(v);
- assertEquals(0, fs.listStatus(outputPath).length);
+ FileStatus[] outputStatuses = fs.listStatus(outputPath);
+ assertEquals(1, outputStatuses.length);
+ Path outputTempPath = outputStatuses[0].getPath();
+ Path inputVectorPath = new Path(outputTempPath,
TimesSquaredJob.INPUT_VECTOR);
+ Path outputVectorPath = new Path(outputTempPath,
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
+ assertEquals(1, fs.listStatus(inputVectorPath).length);
+ assertEquals(1, fs.listStatus(outputVectorPath).length);
+
assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
}