Author: srowen
Date: Tue Jan 18 16:28:43 2011
New Revision: 1060451
URL: http://svn.apache.org/viewvc?rev=1060451&view=rev
Log:
MAHOUT-516 Add -k param to eigencuts to control number of eigenvectors
Modified:
mahout/trunk/conf/driver.classes.props
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
Modified: mahout/trunk/conf/driver.classes.props
URL:
http://svn.apache.org/viewvc/mahout/trunk/conf/driver.classes.props?rev=1060451&r1=1060450&r2=1060451&view=diff
==============================================================================
--- mahout/trunk/conf/driver.classes.props (original)
+++ mahout/trunk/conf/driver.classes.props Tue Jan 18 16:28:43 2011
@@ -30,3 +30,5 @@ org.apache.mahout.classifier.sgd.RunLogi
org.apache.mahout.classifier.sgd.PrintResourceOrFile = cat : Print a file or
resource as the logistic regression models would see it
org.apache.mahout.classifier.bayes.WikipediaXmlSplitter = wikipediaXMLSplitter
: Reads wikipedia data and creates ch
org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver =
wikipediaDataSetCreator : Splits data set of wikipedia wrt feature like country
+org.apache.mahout.clustering.spectral.eigencuts.EigencutsDriver = eigencuts :
Eigencuts spectral clustering
+org.apache.mahout.clustering.spectral.kmeans.SpectralKMeansDriver =
spectralkmeans : Spectral k-means clustering
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1060451&r1=1060450&r2=1060451&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
Tue Jan 18 16:28:43 2011
@@ -57,12 +57,13 @@ public class EigencutsDriver extends Abs
public int run(String[] arg0) throws Exception {
// set up command line arguments
- addOption("input", "i", "Path to input affinity matrix data", true);
- addOption("output", "o", "Output of clusterings", true);
addOption("half-life", "b", "Minimal half-life threshold", true);
addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
addOption("epsilon", "e", "Half-life threshold coefficient",
Double.toString(EPSILON_DEFAULT));
addOption("tau", "t", "Threshold for cutting affinities",
Double.toString(TAU_DEFAULT));
+ addOption("eigenrank", "k", "Number of top eigenvectors to use", true);
+ addOption(DefaultOptionCreator.inputOption().create());
+ addOption(DefaultOptionCreator.outputOption().create());
addOption(DefaultOptionCreator.overwriteOption().create());
Map<String, String> parsedArgs = parseArguments(arg0);
if (parsedArgs == null) {
@@ -79,8 +80,9 @@ public class EigencutsDriver extends Abs
double halflife = Double.parseDouble(parsedArgs.get("--half-life"));
double epsilon = Double.parseDouble(parsedArgs.get("--epsilon"));
double tau = Double.parseDouble(parsedArgs.get("--tau"));
+ int eigenrank = Integer.parseInt(parsedArgs.get("--eigenrank"));
- run(getConf(), input, output, dimensions, halflife, epsilon, tau);
+ run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon,
tau);
return 0;
}
@@ -91,6 +93,7 @@ public class EigencutsDriver extends Abs
* @param conf the Configuration to use
* @param input the Path to the directory containing input affinity tuples
* @param output the Path to the output directory
+ * @param eigenrank The number of top eigenvectors/eigenvalues to use
* @param dimensions the int number of dimensions of the square affinity
matrix
* @param halflife the double minimum half-life threshold
* @param epsilon the double coefficient for setting minimum half-life
threshold
@@ -100,6 +103,7 @@ public class EigencutsDriver extends Abs
Path input,
Path output,
int dimensions,
+ int eigenrank,
double halflife,
double epsilon,
double tau)
@@ -125,12 +129,12 @@ public class EigencutsDriver extends Abs
L.setConf(new Configuration(conf));
// eigendecomposition (step 3)
- int overshoot = (int) ((double) dimensions * OVERSHOOT_MULTIPLIER);
+ int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
List<Double> eigenValues = new ArrayList<Double>(overshoot);
- Matrix eigenVectors = new DenseMatrix(overshoot, dimensions);
- DistributedRowMatrix U = performEigenDecomposition(conf, L, dimensions,
overshoot, eigenValues, eigenVectors, outputCalc);
+ Matrix eigenVectors = new DenseMatrix(overshoot, eigenrank);
+ DistributedRowMatrix U = performEigenDecomposition(conf, L, eigenrank,
overshoot, eigenValues, eigenVectors, outputCalc);
U.setConf(new Configuration(conf));
- eigenValues = eigenValues.subList(0, dimensions);
+ eigenValues = eigenValues.subList(0, eigenrank);
// here's where things get interesting: steps 4, 5, and 6 are unique
// to this algorithm, and depending on the final output, steps 1-3