Author: srowen
Date: Sun Mar 20 20:51:33 2011
New Revision: 1083564
URL: http://svn.apache.org/viewvc?rev=1083564&view=rev
Log:
MAHOUT-623 add overwrite option
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverTest.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java?rev=1083564&r1=1083563&r2=1083564&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCli.java
Sun Mar 20 20:51:33 2011
@@ -28,6 +28,7 @@ import org.apache.hadoop.io.NullWritable
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.VectorWritable;
@@ -54,6 +55,7 @@ public class SSVDCli extends AbstractJob
"false");
addOption("reduceTasks", "t", "number of reduce tasks (where applicable)",
"1");
+ addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, String> pargs = parseArguments(args);
if (pargs == null)
@@ -71,6 +73,7 @@ public class SSVDCli extends AbstractJob
boolean cUHalfSigma = Boolean.parseBoolean(pargs.get("--uHalfSigma"));
boolean cVHalfSigma = Boolean.parseBoolean(pargs.get("--vHalfSigma"));
int reduceTasks = Integer.parseInt(pargs.get("--reduceTasks"));
+ boolean overwrite =
pargs.containsKey(keyFor(DefaultOptionCreator.OVERWRITE_OPTION));
Configuration conf = getConf();
if (conf == null)
@@ -83,6 +86,7 @@ public class SSVDCli extends AbstractJob
solver.setComputeV(computeV);
solver.setcUHalfSigma(cUHalfSigma);
solver.setcVHalfSigma(cVHalfSigma);
+ solver.setOverwrite(overwrite);
solver.run();
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1083564&r1=1083563&r2=1083564&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
Sun Mar 20 20:51:33 2011
@@ -109,6 +109,7 @@ public class SSVDSolver {
private int minSplitSize = -1;
private boolean cUHalfSigma;
private boolean cVHalfSigma;
+ private boolean overwrite;
/**
* create new SSVD solver. Required parameters are passed to constructor to
@@ -212,6 +213,19 @@ public class SSVDSolver {
public String getVPath() {
return vPath;
}
+
+ public boolean isOverwrite() {
+ return overwrite;
+ }
+
+ /**
+ * if true, driver to clean output folder first if exists.
+ *
+ * @param overwrite
+ */
+ public void setOverwrite(boolean overwrite) {
+ this.overwrite = overwrite;
+ }
/**
* run all SSVD jobs.
@@ -235,13 +249,9 @@ public class SSVDSolver {
Path uPath = new Path(outputPath, "U");
Path vPath = new Path(outputPath, "V");
- fs.delete(qPath, true); // or we can't re-run it repeatedly, just in
case.
- fs.delete(btPath, true);
- fs.delete(bbtPath, true);
- fs.delete(uHatPath, true);
- fs.delete(svPath, true);
- fs.delete(uPath, true);
- fs.delete(vPath, true);
+ if (overwrite) {
+ fs.delete(outputPath, true);
+ }
Random rnd = new Random();
long seed = rnd.nextLong();
@@ -486,4 +496,4 @@ public class SSVDSolver {
return result;
}
-}
\ No newline at end of file
+}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java?rev=1083564&r1=1083563&r2=1083564&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
Sun Mar 20 20:51:33 2011
@@ -41,8 +41,6 @@ import org.apache.mahout.math.VectorWrit
/**
* Computes U=Q*Uhat of SSVD (optionally adding x pow(Sigma, 0.5) )
*
- * @author Dmitriy
- *
*/
public class UJob {
private static final String OUTPUT_U = "u";
@@ -115,13 +113,16 @@ public class UJob {
protected void map(Writable key, VectorWritable value, Context context)
throws IOException, InterruptedException {
Vector qRow = value.get();
- if (sValues != null)
- for (int i = 0; i < k; i++)
+ if (sValues != null) {
+ for (int i = 0; i < k; i++) {
uRow.setQuick(i,
- qRow.dot(uHat.getColumn(i)) * sValues.getQuick(i));
- else
- for (int i = 0; i < k; i++)
+ qRow.dot(uHat.getColumn(i)) * sValues.getQuick(i));
+ }
+ } else {
+ for (int i = 0; i < k; i++) {
uRow.setQuick(i, qRow.dot(uHat.getColumn(i)));
+ }
+ }
context.write(key, uRowWritable); // U inherits original A row labels.
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverTest.java?rev=1083564&r1=1083563&r2=1083564&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverTest.java
Sun Mar 20 20:51:33 2011
@@ -111,6 +111,7 @@ public class LocalSSVDSolverTest extends
ablockRows, k, p, 3);
// ssvd.setcUHalfSigma(true);
// ssvd.setcVHalfSigma(true);
+ ssvd.setOverwrite(true);
ssvd.run();
double[] stochasticSValues = ssvd.getSingularValues();