Author: ssc
Date: Mon May 7 14:16:37 2012
New Revision: 1335032
URL: http://svn.apache.org/viewvc?rev=1335032&view=rev
Log:
MAHOUT-834 rowsimilarityjob doesn't clean it's temp dir, and fails when seeing
it again
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java?rev=1335032&r1=1335031&r2=1335032&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/similarity/cooccurrence/RowSimilarityJob.java
Mon May 7 14:16:37 2012
@@ -29,6 +29,8 @@ import org.apache.hadoop.util.ToolRunner
import org.apache.mahout.cf.taste.common.TopK;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.ClassUtils;
+import org.apache.mahout.common.HadoopUtil;
+import org.apache.mahout.common.commandline.DefaultOptionCreator;
import org.apache.mahout.common.mapreduce.VectorSumReducer;
import org.apache.mahout.math.RandomAccessSparseVector;
import org.apache.mahout.math.Vector;
@@ -84,6 +86,7 @@ public class RowSimilarityJob extends Ab
+ DEFAULT_MAX_SIMILARITIES_PER_ROW + ')',
String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ROW));
addOption("excludeSelfSimilarity", "ess", "compute similarity of rows to
themselves?", String.valueOf(false));
addOption("threshold", "tr", "discard row pairs with a similarity value
below this", false);
+ addOption(DefaultOptionCreator.overwriteOption().create());
Map<String,List<String>> parsedArgs = parseArguments(args);
if (parsedArgs == null) {
@@ -99,6 +102,14 @@ public class RowSimilarityJob extends Ab
similarityClassname = similarityClassnameArg;
}
+ // Clear the output and temp paths if the overwrite option has been set
+ if (hasOption(DefaultOptionCreator.OVERWRITE_OPTION)) {
+ // Clear the temp path
+ HadoopUtil.delete(getConf(), getTempPath());
+ // Clear the output path
+ HadoopUtil.delete(getConf(), getOutputPath());
+ }
+
int maxSimilaritiesPerRow =
Integer.parseInt(getOption("maxSimilaritiesPerRow"));
boolean excludeSelfSimilarity =
Boolean.parseBoolean(getOption("excludeSelfSimilarity"));
double threshold = hasOption("threshold") ?