Author: srowen
Date: Tue Jan 18 16:28:43 2011
New Revision: 1060451

URL: http://svn.apache.org/viewvc?rev=1060451&view=rev
Log:
MAHOUT-516 Add -k param to eigencuts to control number of eigenvectors

Modified:
    mahout/trunk/conf/driver.classes.props
    
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java

Modified: mahout/trunk/conf/driver.classes.props
URL: 
http://svn.apache.org/viewvc/mahout/trunk/conf/driver.classes.props?rev=1060451&r1=1060450&r2=1060451&view=diff
==============================================================================
--- mahout/trunk/conf/driver.classes.props (original)
+++ mahout/trunk/conf/driver.classes.props Tue Jan 18 16:28:43 2011
@@ -30,3 +30,5 @@ org.apache.mahout.classifier.sgd.RunLogi
 org.apache.mahout.classifier.sgd.PrintResourceOrFile = cat : Print a file or 
resource as the logistic regression models would see it
 org.apache.mahout.classifier.bayes.WikipediaXmlSplitter = wikipediaXMLSplitter 
: Reads wikipedia data and creates ch  
 org.apache.mahout.classifier.bayes.WikipediaDatasetCreatorDriver = 
wikipediaDataSetCreator : Splits data set of wikipedia wrt feature like country
+org.apache.mahout.clustering.spectral.eigencuts.EigencutsDriver = eigencuts : 
Eigencuts spectral clustering
+org.apache.mahout.clustering.spectral.kmeans.SpectralKMeansDriver = 
spectralkmeans : Spectral k-means clustering 

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1060451&r1=1060450&r2=1060451&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
 Tue Jan 18 16:28:43 2011
@@ -57,12 +57,13 @@ public class EigencutsDriver extends Abs
   public int run(String[] arg0) throws Exception {
 
     // set up command line arguments
-    addOption("input", "i", "Path to input affinity matrix data", true);
-    addOption("output", "o", "Output of clusterings", true);
     addOption("half-life", "b", "Minimal half-life threshold", true);
     addOption("dimensions", "d", "Square dimensions of affinity matrix", true);
     addOption("epsilon", "e", "Half-life threshold coefficient", 
Double.toString(EPSILON_DEFAULT));
     addOption("tau", "t", "Threshold for cutting affinities", 
Double.toString(TAU_DEFAULT));
+    addOption("eigenrank", "k", "Number of top eigenvectors to use", true);
+    addOption(DefaultOptionCreator.inputOption().create());
+    addOption(DefaultOptionCreator.outputOption().create());
     addOption(DefaultOptionCreator.overwriteOption().create());
     Map<String, String> parsedArgs = parseArguments(arg0);
     if (parsedArgs == null) {
@@ -79,8 +80,9 @@ public class EigencutsDriver extends Abs
     double halflife = Double.parseDouble(parsedArgs.get("--half-life"));
     double epsilon = Double.parseDouble(parsedArgs.get("--epsilon"));
     double tau = Double.parseDouble(parsedArgs.get("--tau"));
+    int eigenrank = Integer.parseInt(parsedArgs.get("--eigenrank"));
 
-    run(getConf(), input, output, dimensions, halflife, epsilon, tau);
+    run(getConf(), input, output, eigenrank, dimensions, halflife, epsilon, 
tau);
 
     return 0;
   }
@@ -91,6 +93,7 @@ public class EigencutsDriver extends Abs
    * @param conf the Configuration to use
    * @param input the Path to the directory containing input affinity tuples
    * @param output the Path to the output directory
+   * @param eigenrank The number of top eigenvectors/eigenvalues to use
    * @param dimensions the int number of dimensions of the square affinity 
matrix
    * @param halflife the double minimum half-life threshold
    * @param epsilon the double coefficient for setting minimum half-life 
threshold
@@ -100,6 +103,7 @@ public class EigencutsDriver extends Abs
                          Path input,
                          Path output,
                          int dimensions,
+                         int eigenrank,
                          double halflife,
                          double epsilon,
                          double tau)
@@ -125,12 +129,12 @@ public class EigencutsDriver extends Abs
       L.setConf(new Configuration(conf));
 
       // eigendecomposition (step 3)
-      int overshoot = (int) ((double) dimensions * OVERSHOOT_MULTIPLIER);
+      int overshoot = (int) ((double) eigenrank * OVERSHOOT_MULTIPLIER);
       List<Double> eigenValues = new ArrayList<Double>(overshoot);
-      Matrix eigenVectors = new DenseMatrix(overshoot, dimensions);
-      DistributedRowMatrix U = performEigenDecomposition(conf, L, dimensions, 
overshoot, eigenValues, eigenVectors, outputCalc);
+      Matrix eigenVectors = new DenseMatrix(overshoot, eigenrank);
+      DistributedRowMatrix U = performEigenDecomposition(conf, L, eigenrank, 
overshoot, eigenValues, eigenVectors, outputCalc);
       U.setConf(new Configuration(conf));
-      eigenValues = eigenValues.subList(0, dimensions);
+      eigenValues = eigenValues.subList(0, eigenrank);
 
       // here's where things get interesting: steps 4, 5, and 6 are unique
       // to this algorithm, and depending on the final output, steps 1-3


Reply via email to