Author: smarthi
Date: Sun May 26 03:26:12 2013
New Revision: 1486388
URL: http://svn.apache.org/r1486388
Log:
MAHOUT-1213: SSVD job doesn't clean it's temp dir, and fails when seeing it
again
Modified:
mahout/trunk/CHANGELOG
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
Modified: mahout/trunk/CHANGELOG
URL:
http://svn.apache.org/viewvc/mahout/trunk/CHANGELOG?rev=1486388&r1=1486387&r2=1486388&view=diff
==============================================================================
--- mahout/trunk/CHANGELOG (original)
+++ mahout/trunk/CHANGELOG Sun May 26 03:26:12 2013
@@ -2,6 +2,8 @@ Mahout Change Log
Release 0.8 - unreleased
+ MAHOUT-1213: SSVD job doesn't clean it's temp dir, and fails when seeing it
again (smarthi)
+
__MAHOUT-1223: Fixed point skipped in StreamingKMeans when iterating through
centroids from a reducer (dfilimon)
__MAHOUT-1222: Fix total weight in FastProjectionSearch (dfilimon)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java?rev=1486388&r1=1486387&r2=1486388&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
Sun May 26 03:26:12 2013
@@ -25,6 +25,7 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.hadoop.MatrixColumnMeansJob;
@@ -108,8 +109,7 @@ public class SSVDCli extends AbstractJob
Path xiPath = xiPathStr == null ? null : new Path(xiPathStr);
boolean pca = Boolean.parseBoolean(getOption("pca")) || xiPath != null;
- boolean overwrite =
- pargs.containsKey(keyFor(DefaultOptionCreator.OVERWRITE_OPTION));
+ boolean overwrite = hasOption(DefaultOptionCreator.OVERWRITE_OPTION);
Configuration conf = getConf();
if (conf == null) {
@@ -118,7 +118,17 @@ public class SSVDCli extends AbstractJob
Path[] inputPaths = { getInputPath() };
Path tempPath = getTempPath();
- FileSystem fs = FileSystem.get(getOutputPath().toUri(), conf);
+ FileSystem fs = FileSystem.get(getTempPath().toUri(), conf);
+
+ // housekeeping
+ if (overwrite) {
+ // clear the output path
+ HadoopUtil.delete(getConf(), getOutputPath());
+ // clear the temp path
+ HadoopUtil.delete(getConf(), getTempPath());
+ }
+
+ fs.mkdirs(getOutputPath());
// MAHOUT-817
if (pca && xiPath == null) {
@@ -156,13 +166,6 @@ public class SSVDCli extends AbstractJob
solver.run();
- // housekeeping
- if (overwrite) {
- fs.delete(getOutputPath(), true);
- }
-
- fs.mkdirs(getOutputPath());
-
Vector svalues = solver.getSingularValues().viewPart(0, k);
SSVDHelper.saveVector(svalues, getOutputPath("sigma"), conf);
@@ -184,6 +187,10 @@ public class SSVDCli extends AbstractJob
&& !fs.rename(new Path(solver.getvHalfSigmaPath()), getOutputPath())) {
throw new IOException("Unable to move V*Sigma^0.5 results to the output
path.");
}
+
+ // Delete the temp path on exit
+ fs.deleteOnExit(getTempPath());
+
return 0;
}