Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
 Wed Feb 22 21:57:27 2012
@@ -36,6 +36,8 @@ import org.apache.mahout.math.DenseVecto
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.function.Functions;
+import org.apache.mahout.math.function.PlusMult;
 
 public class VJob {
   private static final String OUTPUT_V = "v";
@@ -43,13 +45,120 @@ public class VJob {
   private static final String PROP_SIGMA_PATH = "ssvd.sigma.path";
   private static final String PROP_V_HALFSIGMA = "ssvd.v.halfsigma";
   private static final String PROP_K = "ssvd.k";
+  public static final String PROP_SQ_PATH = "ssvdpca.sq.path";
+  public static final String PROP_XI_PATH = "ssvdpca.xi.path";
 
   private Job job;
 
-  public void start(Configuration conf, Path inputPathBt, Path inputUHatPath,
-      Path inputSigmaPath, Path outputPath, int k, int numReduceTasks,
-      boolean vHalfSigma) throws ClassNotFoundException, InterruptedException,
-      IOException {
+  public static final class VMapper extends
+      Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {
+
+    private Matrix uHat;
+    private Vector vRow;
+    private Vector sValues;
+    private VectorWritable vRowWritable;
+    private int kp;
+    private int k;
+    /*
+     * xi and s_q are PCA-related corrections, per MAHOUT-817
+     */
+    protected Vector xi;
+    protected Vector sq;
+    protected PlusMult plusMult = new PlusMult(0);
+
+    @Override
+    protected void map(IntWritable key, VectorWritable value, Context context)
+      throws IOException, InterruptedException {
+      Vector bCol = value.get();
+      /*
+       * MAHOUT-817: PCA correction for B': b_{col=i} -= s_q * xi_{i}
+       */
+      if (xi != null) {
+        /*
+         * code defensively against shortened xi which may be externally
+         * supplied
+         */
+        int btIndex = key.get();
+        double xii = xi.size() > btIndex ? xi.getQuick(btIndex) : 0.0;
+        plusMult.setMultiplicator(-xii);
+        bCol.assign(sq, plusMult);
+      }
+
+      for (int i = 0; i < k; i++) {
+        vRow.setQuick(i, bCol.dot(uHat.viewColumn(i)) / sValues.getQuick(i));
+      }
+      context.write(key, vRowWritable);
+    }
+
+    @Override
+    protected void setup(Context context) throws IOException,
+      InterruptedException {
+      super.setup(context);
+
+      Configuration conf = context.getConfiguration();
+      FileSystem fs = FileSystem.get(conf);
+      Path uHatPath = new Path(conf.get(PROP_UHAT_PATH));
+
+      Path sigmaPath = new Path(conf.get(PROP_SIGMA_PATH));
+
+      uHat =
+        new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs, uHatPath, 
conf));
+      // since uHat is (k+p) x (k+p)
+      kp = uHat.columnSize();
+      k = context.getConfiguration().getInt(PROP_K, kp);
+      vRow = new DenseVector(k);
+      vRowWritable = new VectorWritable(vRow);
+
+      sValues = SSVDHelper.loadVector(sigmaPath, conf);
+      if (conf.get(PROP_V_HALFSIGMA) != null) {
+        sValues.assign(Functions.SQRT);
+      }
+
+      /*
+       * PCA -related corrections (MAHOUT-817)
+       */
+      String xiPathStr = conf.get(PROP_XI_PATH);
+      if (xiPathStr != null) {
+        xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf);
+        sq =
+          SSVDHelper.loadAndSumUpVectors(new Path(conf.get(PROP_SQ_PATH)), 
conf);
+      }
+
+    }
+
+  }
+
+  /**
+   * 
+   * @param conf
+   * @param inputPathBt
+   * @param xiPath
+   *          PCA row mean (MAHOUT-817, to fix B')
+   * @param sqPath
+   *          sq (MAHOUT-817, to fix B')
+   * @param inputUHatPath
+   * @param inputSigmaPath
+   * @param outputPath
+   * @param k
+   * @param numReduceTasks
+   * @param vHalfSigma
+   * @throws ClassNotFoundException
+   * @throws InterruptedException
+   * @throws IOException
+   */
+  public void run(Configuration conf,
+                  Path inputPathBt,
+                  Path xiPath,
+                  Path sqPath,
+
+                  Path inputUHatPath,
+                  Path inputSigmaPath,
+
+                  Path outputPath,
+                  int k,
+                  int numReduceTasks,
+                  boolean vHalfSigma) throws ClassNotFoundException,
+    InterruptedException, IOException {
 
     job = new Job(conf);
     job.setJobName("V-job");
@@ -64,7 +173,8 @@ public class VJob {
     job.getConfiguration().set("mapreduce.output.basename", OUTPUT_V);
     FileOutputFormat.setCompressOutput(job, true);
     FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class);
-    SequenceFileOutputFormat.setOutputCompressionType(job, 
CompressionType.BLOCK);
+    SequenceFileOutputFormat.setOutputCompressionType(job,
+                                                      CompressionType.BLOCK);
 
     job.setMapOutputKeyClass(IntWritable.class);
     job.setMapOutputValueClass(VectorWritable.class);
@@ -81,12 +191,21 @@ public class VJob {
     }
     job.getConfiguration().setInt(PROP_K, k);
     job.setNumReduceTasks(0);
+
+    /*
+     * PCA-related options, MAHOUT-817
+     */
+    if (xiPath != null) {
+      job.getConfiguration().set(PROP_XI_PATH, xiPath.toString());
+      job.getConfiguration().set(PROP_SQ_PATH, sqPath.toString());
+    }
+
     job.submit();
 
   }
 
   public void waitForCompletion() throws IOException, ClassNotFoundException,
-      InterruptedException {
+    InterruptedException {
     job.waitForCompletion(false);
 
     if (!job.isSuccessful()) {
@@ -95,54 +214,4 @@ public class VJob {
 
   }
 
-  public static final class VMapper extends
-      Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> {
-
-    private Matrix uHat;
-    private DenseVector vRow;
-    private DenseVector sValues;
-    private VectorWritable vRowWritable;
-    private int kp;
-    private int k;
-
-    @Override
-    protected void map(IntWritable key, VectorWritable value, Context context)
-      throws IOException, InterruptedException {
-      Vector qRow = value.get();
-      for (int i = 0; i < k; i++) {
-        vRow.setQuick(i,
-                      qRow.dot(uHat.viewColumn(i)) / sValues.getQuick(i));
-      }
-      context.write(key, vRowWritable); // U inherits original A row labels.
-    }
-
-    @Override
-    protected void setup(Context context) throws IOException,
-        InterruptedException {
-      super.setup(context);
-      Path uHatPath = new Path(context.getConfiguration().get(PROP_UHAT_PATH));
-
-      Path sigmaPath = new 
Path(context.getConfiguration().get(PROP_SIGMA_PATH));
-      FileSystem fs = FileSystem.get(uHatPath.toUri(), 
context.getConfiguration());
-
-      uHat = new DenseMatrix(SSVDSolver.loadDistributedRowMatrix(fs,
-          uHatPath, context.getConfiguration()));
-      // since uHat is (k+p) x (k+p)
-      kp = uHat.columnSize();
-      k = context.getConfiguration().getInt(PROP_K, kp);
-      vRow = new DenseVector(k);
-      vRowWritable = new VectorWritable(vRow);
-
-      sValues = new DenseVector(SSVDSolver.loadDistributedRowMatrix(fs,
-          sigmaPath, context.getConfiguration())[0], true);
-      if (context.getConfiguration().get(PROP_V_HALFSIGMA) != null) {
-        for (int i = 0; i < k; i++) {
-          sValues.setQuick(i, Math.sqrt(sValues.getQuick(i)));
-        }
-      }
-
-    }
-
-  }
-
 }

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java
 Wed Feb 22 21:57:27 2012
@@ -70,7 +70,7 @@ public class YtYJob {
 
     @Override
     protected void setup(Context context) throws IOException,
-        InterruptedException {
+      InterruptedException {
       int k = context.getConfiguration().getInt(PROP_K, -1);
       int p = context.getConfiguration().getInt(PROP_P, -1);
 
@@ -78,10 +78,10 @@ public class YtYJob {
       Validate.isTrue(p > 0, "invalid p parameter");
 
       kp = k + p;
-      long omegaSeed = Long.parseLong(context.getConfiguration()
-          .get(PROP_OMEGA_SEED));
+      long omegaSeed =
+        Long.parseLong(context.getConfiguration().get(PROP_OMEGA_SEED));
 
-      omega = new Omega(omegaSeed, k, p);
+      omega = new Omega(omegaSeed, k + p);
 
       mYtY = new UpperTriangular(kp);
 
@@ -92,7 +92,7 @@ public class YtYJob {
 
     @Override
     protected void map(Writable key, VectorWritable value, Context context)
-        throws IOException, InterruptedException {
+      throws IOException, InterruptedException {
       omega.computeYRow(value.get(), yRow);
       // compute outer product update for YtY
 
@@ -115,12 +115,10 @@ public class YtYJob {
          * are creating some short-lived references) here is that we obviously
          * do two times more iterations then necessary if y row is pretty 
dense.
          */
-        for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI
-            .hasNext();) {
+        for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); 
iterI.hasNext();) {
           Vector.Element eli = iterI.next();
           int i = eli.index();
-          for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ
-              .hasNext();) {
+          for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); 
iterJ.hasNext();) {
             Vector.Element elj = iterJ.next();
             int j = elj.index();
             if (j < i) {
@@ -134,9 +132,10 @@ public class YtYJob {
 
     @Override
     protected void cleanup(Context context) throws IOException,
-        InterruptedException {
+      InterruptedException {
       context.write(new IntWritable(context.getTaskAttemptID().getTaskID()
-          .getId()), new VectorWritable(new DenseVector(mYtY.getData())));
+                                           .getId()),
+                    new VectorWritable(new DenseVector(mYtY.getData())));
     }
   }
 
@@ -147,7 +146,7 @@ public class YtYJob {
 
     @Override
     protected void setup(Context context) throws IOException,
-        InterruptedException {
+      InterruptedException {
       int k = context.getConfiguration().getInt(PROP_K, -1);
       int p = context.getConfiguration().getInt(PROP_P, -1);
 
@@ -158,22 +157,28 @@ public class YtYJob {
 
     @Override
     protected void cleanup(Context context) throws IOException,
-        InterruptedException {
+      InterruptedException {
       context.write(new IntWritable(), accum);
     }
 
     @Override
-    protected void reduce(IntWritable key, Iterable<VectorWritable> values,
-        Context arg2) throws IOException, InterruptedException {
+    protected void reduce(IntWritable key,
+                          Iterable<VectorWritable> values,
+                          Context arg2) throws IOException,
+      InterruptedException {
       for (VectorWritable vw : values) {
         acc.addAll(vw.get());
       }
     }
   }
 
-  public static void run(Configuration conf, Path[] inputPaths,
-      Path outputPath, int k, int p, long seed)
-      throws ClassNotFoundException, InterruptedException, IOException {
+  public static void run(Configuration conf,
+                         Path[] inputPaths,
+                         Path outputPath,
+                         int k,
+                         int p,
+                         long seed) throws ClassNotFoundException,
+    InterruptedException, IOException {
 
     Job job = new Job(conf);
     job.setJobName("YtY-job");

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
 Wed Feb 22 21:57:27 2012
@@ -17,9 +17,10 @@
 
 package org.apache.mahout.math.hadoop;
 
-import com.google.common.base.Function;
-import com.google.common.collect.Iterators;
-import com.google.common.collect.Maps;
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.Map;
+
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -27,6 +28,7 @@ import org.apache.hadoop.fs.Path;
 import org.apache.mahout.clustering.ClusteringTestUtils;
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.iterator.sequencefile.PathFilters;
+import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.MatrixSlice;
 import org.apache.mahout.math.RandomAccessSparseVector;
@@ -34,11 +36,12 @@ import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorIterable;
 import org.apache.mahout.math.VectorWritable;
 import org.apache.mahout.math.decomposer.SolverTest;
+import org.apache.mahout.math.function.Functions;
 import org.junit.Test;
 
-import java.io.IOException;
-import java.util.Iterator;
-import java.util.Map;
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
+import com.google.common.collect.Maps;
 
 public final class TestDistributedRowMatrix extends MahoutTestCase {
   public static final String TEST_PROPERTY_KEY = "test.property.key";
@@ -81,6 +84,38 @@ public final class TestDistributedRowMat
   }
 
   @Test
+  public void testMatrixColumnMeansJob() throws Exception {
+    Matrix m =
+        SolverTest.randomSequentialAccessSparseMatrix(100, 90, 50, 20, 1.0);
+    DistributedRowMatrix dm =
+        randomDistributedMatrix(100, 90, 50, 20, 1.0, false);
+
+    Vector expected = new DenseVector(50);
+    for (int i = 0; i < m.numRows(); i++) {
+      expected.assign(m.viewRow(i), Functions.PLUS);
+    }
+    expected.assign(Functions.DIV, m.numRows());
+    Vector actual = dm.columnMeans("DenseVector");
+    assertEquals(0.0, expected.getDistanceSquared(actual), EPSILON);
+  }
+
+  @Test
+  public void testNullMatrixColumnMeansJob() throws Exception {
+    Matrix m =
+        SolverTest.randomSequentialAccessSparseMatrix(100, 90, 0, 0, 1.0);
+    DistributedRowMatrix dm =
+        randomDistributedMatrix(100, 90, 0, 0, 1.0, false);
+
+    Vector expected = new DenseVector(0);
+    for (int i = 0; i < m.numRows(); i++) {
+      expected.assign(m.viewRow(i), Functions.PLUS);
+    }
+    expected.assign(Functions.DIV, m.numRows());
+    Vector actual = dm.columnMeans();
+    assertEquals(0.0, expected.getDistanceSquared(actual), EPSILON);
+  }
+
+  @Test
   public void testMatrixTimesVector() throws Exception {
     Vector v = new RandomAccessSparseVector(50);
     v.assign(1.0);
@@ -118,33 +153,33 @@ public final class TestDistributedRowMat
   }
 
   @Test
-  public void testMatrixMultiplactionJobConfBuilder() throws Exception {    
+  public void testMatrixMultiplactionJobConfBuilder() throws Exception {
     Configuration initialConf = createInitialConf();
-        
-    Path baseTmpDirPath = getTestTempDirPath("testpaths");    
+
+    Path baseTmpDirPath = getTestTempDirPath("testpaths");
     Path aPath = new Path(baseTmpDirPath, "a");
     Path bPath = new Path(baseTmpDirPath, "b");
     Path outPath = new Path(baseTmpDirPath, "out");
-    
+
     Configuration mmJobConf = 
MatrixMultiplicationJob.createMatrixMultiplyJobConf(aPath, bPath, outPath, 10);
-    Configuration mmCustomJobConf = 
MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, 
-                                                                               
         aPath, 
-                                                                               
         bPath, 
-                                                                               
         outPath, 
+    Configuration mmCustomJobConf = 
MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf,
+                                                                               
         aPath,
+                                                                               
         bPath,
+                                                                               
         outPath,
                                                                                
         10);
-    
+
     assertNull(mmJobConf.get(TEST_PROPERTY_KEY));
-    assertEquals(TEST_PROPERTY_VALUE, mmCustomJobConf.get(TEST_PROPERTY_KEY)); 
 
+    assertEquals(TEST_PROPERTY_VALUE, mmCustomJobConf.get(TEST_PROPERTY_KEY));
   }
-  
+
   @Test
   public void testTransposeJobConfBuilder() throws Exception {
     Configuration initialConf = createInitialConf();
-    
-    Path baseTmpDirPath = getTestTempDirPath("testpaths");    
+
+    Path baseTmpDirPath = getTestTempDirPath("testpaths");
     Path inputPath = new Path(baseTmpDirPath, "input");
     Path outputPath = new Path(baseTmpDirPath, "output");
-    
+
     Configuration transposeJobConf = 
TransposeJob.buildTransposeJobConf(inputPath, outputPath, 10);
     Configuration transposeCustomJobConf = 
TransposeJob.buildTransposeJobConf(initialConf, inputPath, outputPath, 10);
 
@@ -155,7 +190,7 @@ public final class TestDistributedRowMat
   @Test public void testTimesSquaredJobConfBuilders() throws Exception {
     Configuration initialConf = createInitialConf();
 
-    Path baseTmpDirPath = getTestTempDirPath("testpaths");    
+    Path baseTmpDirPath = getTestTempDirPath("testpaths");
     Path inputPath = new Path(baseTmpDirPath, "input");
     Path outputPath = new Path(baseTmpDirPath, "output");
 
@@ -167,46 +202,46 @@ public final class TestDistributedRowMat
 
     assertNull(timesSquaredJobConf1.get(TEST_PROPERTY_KEY));
     assertEquals(TEST_PROPERTY_VALUE, 
customTimesSquaredJobConf1.get(TEST_PROPERTY_KEY));
-    
+
     Configuration timesJobConf = TimesSquaredJob.createTimesJobConf(v, 50, 
inputPath, outputPath);
     Configuration customTimesJobConf = 
TimesSquaredJob.createTimesJobConf(initialConf, v, 50, inputPath, outputPath);
-    
+
     assertNull(timesJobConf.get(TEST_PROPERTY_KEY));
     assertEquals(TEST_PROPERTY_VALUE, 
customTimesJobConf.get(TEST_PROPERTY_KEY));
-    
-    Configuration timesSquaredJobConf2 = 
TimesSquaredJob.createTimesSquaredJobConf(v, 
-                                                                               
    inputPath, 
-                                                                               
    outputPath, 
-                                                                               
    TimesSquaredJob.TimesSquaredMapper.class, 
+
+    Configuration timesSquaredJobConf2 = 
TimesSquaredJob.createTimesSquaredJobConf(v,
+                                                                               
    inputPath,
+                                                                               
    outputPath,
+                                                                               
    TimesSquaredJob.TimesSquaredMapper.class,
                                                                                
    TimesSquaredJob.VectorSummingReducer.class);
     Configuration customTimesSquaredJobConf2 = 
TimesSquaredJob.createTimesSquaredJobConf(initialConf,
-                                                                               
          v, 
-                                                                               
          inputPath, 
-                                                                               
          outputPath, 
-                                                                               
          TimesSquaredJob.TimesSquaredMapper.class, 
+                                                                               
          v,
+                                                                               
          inputPath,
+                                                                               
          outputPath,
+                                                                               
          TimesSquaredJob.TimesSquaredMapper.class,
                                                                                
          TimesSquaredJob.VectorSummingReducer.class);
- 
+
     assertNull(timesSquaredJobConf2.get(TEST_PROPERTY_KEY));
     assertEquals(TEST_PROPERTY_VALUE, 
customTimesSquaredJobConf2.get(TEST_PROPERTY_KEY));
 
     Configuration timesSquaredJobConf3 = 
TimesSquaredJob.createTimesSquaredJobConf(v,
                                                                                
    50,
-                                                                               
    inputPath, 
-                                                                               
    outputPath, 
-                                                                               
    TimesSquaredJob.TimesSquaredMapper.class, 
+                                                                               
    inputPath,
+                                                                               
    outputPath,
+                                                                               
    TimesSquaredJob.TimesSquaredMapper.class,
                                                                                
    TimesSquaredJob.VectorSummingReducer.class);
     Configuration customTimesSquaredJobConf3 = 
TimesSquaredJob.createTimesSquaredJobConf(initialConf,
                                                                                
          v,
                                                                                
          50,
-                                                                               
          inputPath, 
-                                                                               
          outputPath, 
-                                                                               
          TimesSquaredJob.TimesSquaredMapper.class, 
+                                                                               
          inputPath,
+                                                                               
          outputPath,
+                                                                               
          TimesSquaredJob.TimesSquaredMapper.class,
                                                                                
          TimesSquaredJob.VectorSummingReducer.class);
- 
+
     assertNull(timesSquaredJobConf3.get(TEST_PROPERTY_KEY));
     assertEquals(TEST_PROPERTY_VALUE, 
customTimesSquaredJobConf3.get(TEST_PROPERTY_KEY));
   }
-  
+
   @Test
   public void testTimesVectorTempDirDeletion() throws Exception {
     Configuration conf = new Configuration();
@@ -224,13 +259,13 @@ public final class TestDistributedRowMat
     Vector result1 = dm.times(v);
 
     assertEquals(0, fs.listStatus(outputPath).length);
-    
+
     deleteContentsOfPath(conf, outputPath);
     assertEquals(0, fs.listStatus(outputPath).length);
-    
+
     conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
-    
+
     Vector result2 = dm.times(v);
 
     FileStatus[] outputStatuses = fs.listStatus(outputPath);
@@ -261,13 +296,13 @@ public final class TestDistributedRowMat
     Vector result1 = dm.timesSquared(v);
 
     assertEquals(0, fs.listStatus(outputPath).length);
-    
+
     deleteContentsOfPath(conf, outputPath);
     assertEquals(0, fs.listStatus(outputPath).length);
-    
+
     conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true);
     dm.setConf(conf);
-    
+
     Vector result2 = dm.timesSquared(v);
 
     FileStatus[] outputStatuses = fs.listStatus(outputPath);
@@ -277,7 +312,7 @@ public final class TestDistributedRowMat
     Path outputVectorPath = new Path(outputTempPath, 
TimesSquaredJob.OUTPUT_VECTOR_FILENAME);
     assertEquals(1, fs.listStatus(inputVectorPath, 
PathFilters.logsCRCFilter()).length);
     assertEquals(1, fs.listStatus(outputVectorPath, 
PathFilters.logsCRCFilter()).length);
-    
+
     assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON);
   }
 
@@ -289,13 +324,13 @@ public final class TestDistributedRowMat
 
   private static void deleteContentsOfPath(Configuration conf, Path path) 
throws Exception {
     FileSystem fs = path.getFileSystem(conf);
-    
+
     FileStatus[] statuses = fs.listStatus(path);
     for (FileStatus status : statuses) {
       fs.delete(status.getPath(), true);
-    }    
+    }
   }
-    
+
   public DistributedRowMatrix randomDistributedMatrix(int numRows,
                                                       int nonNullRows,
                                                       int numCols,

Copied: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
 (from r1245615, 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java&r1=1245615&r2=1292532&rev=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
 Wed Feb 22 21:57:27 2012
@@ -1,226 +1,185 @@
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.mahout.math.hadoop.stochasticsvd;
-
-import java.io.Closeable;
-import java.io.File;
-import java.io.IOException;
-import java.util.Deque;
-import java.util.LinkedList;
-import java.util.Random;
-
-import com.google.common.io.Closeables;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.io.SequenceFile.CompressionType;
-import org.apache.hadoop.io.compress.DefaultCodec;
-import org.apache.mahout.common.MahoutTestCase;
-import org.apache.mahout.common.RandomUtils;
-import org.apache.mahout.math.DenseMatrix;
-import org.apache.mahout.math.SequentialAccessSparseVector;
-import org.apache.mahout.math.SingularValueDecomposition;
-import org.apache.mahout.math.Vector;
-import org.apache.mahout.math.VectorWritable;
-import org.junit.Test;
-
-/**
- * 
- * Tests SSVD solver with a made-up data running hadoop solver in a local mode.
- * It requests full-rank SSVD and then compares singular values to that of
- * Colt's SVD asserting epsilon(precision) 1e-10 or whatever most recent value
- * configured.
- * 
- */
-public class LocalSSVDSolverSparseSequentialTest extends MahoutTestCase {
-
-  private static final double s_epsilon = 1.0E-10d;
-
-  /*
-   * removing from tests to reduce test running time
-   */
-  /* @Test */
-  public void testSSVDSolverSparse() throws IOException { 
-    runSSVDSolver(0);
-  }
-  
-  @Test
-  public void testSSVDSolverPowerIterations1() throws IOException { 
-    runSSVDSolver(1);
-  }
-  
-  public void runSSVDSolver(int q) throws IOException {
-
-    Configuration conf = new Configuration();
-    conf.set("mapred.job.tracker", "local");
-    conf.set("fs.default.name", "file:///");
-
-    // conf.set("mapred.job.tracker","localhost:11011");
-    // conf.set("fs.default.name","hdfs://localhost:11010/");
-
-    Deque<Closeable> closeables = new LinkedList<Closeable>();
-    Random rnd = RandomUtils.getRandom();
-
-    File tmpDir = getTestTempDir("svdtmp");
-    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());
-
-    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");
-
-    // create distributed row matrix-like struct
-    SequenceFile.Writer w =
-      SequenceFile.createWriter(FileSystem.getLocal(conf),
-                                conf,
-                                aLocPath,
-                                IntWritable.class,
-                                VectorWritable.class,
-                                CompressionType.BLOCK,
-                                new DefaultCodec());
-    closeables.addFirst(w);
-
-    int n = 100;
-    int m = 2000;
-    double percent = 5;
-
-    VectorWritable vw = new VectorWritable();
-    IntWritable roww = new IntWritable();
-
-    double muAmplitude = 50.0;
-    for (int i = 0; i < m; i++) {
-      Vector dv = new SequentialAccessSparseVector(n);
-      for (int j = 0; j < n * percent / 100; j++) {
-        dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5));
-      }
-      roww.set(i);
-      vw.set(dv);
-      w.append(roww, vw);
-    }
-    closeables.remove(w);
-    Closeables.close(w, true);
-
-    FileSystem fs = FileSystem.get(aLocPath.toUri(), conf);
-
-    Path tempDirPath = getTestTempDirPath("svd-proc");
-    Path aPath = new Path(tempDirPath, "A/A.seq");
-    fs.copyFromLocalFile(aLocPath, aPath);
-
-    Path svdOutPath = new Path(tempDirPath, "SSVD-out");
-
-    // make sure we wipe out previous test results, just a convenience
-    fs.delete(svdOutPath, true);
-
-    // Solver starts here:
-    System.out.println("Input prepared, starting solver...");
-
-    int ablockRows = 867;
-    int p = 60;
-    int k = 40;
-    SSVDSolver ssvd =
-      new SSVDSolver(conf,
-                     new Path[] { aPath },
-                     svdOutPath,
-                     ablockRows,
-                     k,
-                     p,
-                     3);
-    ssvd.setOuterBlockHeight(500);
-    ssvd.setAbtBlockHeight(251);
-    
-    /*
-     * removing V,U jobs from this test to reduce running time. i will keep 
them
-     * put in the dense test though.
-     */
-    ssvd.setComputeU(false);
-    ssvd.setComputeV(false);
-    
-    ssvd.setOverwrite(true);
-    ssvd.setQ(q);
-    ssvd.setBroadcast(true);
-    ssvd.run();
-
-    double[] stochasticSValues = ssvd.getSingularValues();
-    System.out.println("--SSVD solver singular values:");
-    dumpSv(stochasticSValues);
-    System.out.println("--Colt SVD solver singular values:");
-
-    // try to run the same thing without stochastic algo
-    double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf);
-
-    // SingularValueDecompositionImpl svd=new 
SingularValueDecompositionImpl(new
-    // Array2DRowRealMatrix(a));
-    SingularValueDecomposition svd2 =
-      new SingularValueDecomposition(new DenseMatrix(a));
-
-    double[] svalues2 = svd2.getSingularValues();
-    dumpSv(svalues2);
-
-    for (int i = 0; i < k + p; i++) {
-      assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon);
-    }
-
-    double[][] mQ =
-      SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
-          + BtJob.OUTPUT_Q + "-*"), conf);
-
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon);
-
-    /*
-     * removing tests on U and V to keep this test leaner. I will keep U,V
-     * computation and assertions in the dense tests though.
-     */
-
-    /*
-    double[][] u =
-      SSVDSolver.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "U/[^_]*"),
-                                          conf);
-
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(u), false, s_epsilon);
-    double[][] v =
-      SSVDSolver.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "V/[^_]*"),
-                                          conf);
-
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(v), false, s_epsilon);
-    */
-  }
-
-  static void dumpSv(double[] s) {
-    System.out.printf("svs: ");
-    for (double value : s) {
-      System.out.printf("%f  ", value);
-    }
-    System.out.println();
-
-  }
-
-  static void dump(double[][] matrix) {
-    for (double[] aMatrix : matrix) {
-      for (double anAMatrix : aMatrix) {
-        System.out.printf("%f  ", anAMatrix);
-      }
-      System.out.println();
-    }
-  }
-
-}
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.hadoop.stochasticsvd;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.Deque;
+import java.util.LinkedList;
+import java.util.Random;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.SequentialAccessSparseVector;
+import org.apache.mahout.math.SingularValueDecomposition;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.VectorWritable;
+import org.apache.mahout.math.function.Functions;
+import org.junit.Test;
+
+import com.google.common.io.Closeables;
+
+public class LocalSSVDPCADenseTest extends MahoutTestCase {
+
+  private static final double s_epsilon = 1.0E-10d;
+
+  @Test
+  public void runPCATest1() throws IOException {
+    runSSVDSolver(1);
+  }
+
+  public void runSSVDSolver(int q) throws IOException {
+
+    Configuration conf = new Configuration();
+    conf.set("mapred.job.tracker", "local");
+    conf.set("fs.default.name", "file:///");
+
+    // conf.set("mapred.job.tracker","localhost:11011");
+    // conf.set("fs.default.name","hdfs://localhost:11010/");
+
+    Deque<Closeable> closeables = new LinkedList<Closeable>();
+    Random rnd = RandomUtils.getRandom();
+
+    File tmpDir = getTestTempDir("svdtmp");
+    conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());
+
+    Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");
+
+    // create distributed row matrix-like struct
+    SequenceFile.Writer w =
+      SequenceFile.createWriter(FileSystem.getLocal(conf),
+                                conf,
+                                aLocPath,
+                                IntWritable.class,
+                                VectorWritable.class,
+                                CompressionType.BLOCK,
+                                new DefaultCodec());
+    closeables.addFirst(w);
+
+    int n = 100;
+    int m = 2000;
+    double percent = 5;
+
+    VectorWritable vw = new VectorWritable();
+    IntWritable roww = new IntWritable();
+
+    Vector xi = new DenseVector(n);
+
+    double muAmplitude = 50.0;
+    for (int i = 0; i < m; i++) {
+      Vector dv = new SequentialAccessSparseVector(n);
+      for (int j = 0; j < n * percent / 100; j++) {
+        dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25));
+      }
+      roww.set(i);
+      vw.set(dv);
+      w.append(roww, vw);
+      xi.assign(dv, Functions.PLUS);
+    }
+    closeables.remove(w);
+    Closeables.close(w, true);
+
+    xi.assign(Functions.mult(1 / m));
+
+    FileSystem fs = FileSystem.get(conf);
+
+    Path tempDirPath = getTestTempDirPath("svd-proc");
+    Path aPath = new Path(tempDirPath, "A/A.seq");
+    fs.copyFromLocalFile(aLocPath, aPath);
+    Path xiPath = new Path(tempDirPath, "xi/xi.seq");
+    SSVDHelper.saveVector(xi, xiPath, conf);
+
+    Path svdOutPath = new Path(tempDirPath, "SSVD-out");
+
+    // make sure we wipe out previous test results, just a convenience
+    fs.delete(svdOutPath, true);
+
+    // Solver starts here:
+    System.out.println("Input prepared, starting solver...");
+
+    int ablockRows = 867;
+    int p = 60;
+    int k = 40;
+    SSVDSolver ssvd =
+      new SSVDSolver(conf,
+                     new Path[] { aPath },
+                     svdOutPath,
+                     ablockRows,
+                     k,
+                     p,
+                     3);
+    ssvd.setOuterBlockHeight(500);
+    ssvd.setAbtBlockHeight(251);
+    ssvd.setPcaMeanPath(xiPath);
+
+    /*
+     * removing V,U jobs from this test to reduce running time. i will keep 
them
+     * put in the dense test though.
+     */
+    ssvd.setComputeU(false);
+    ssvd.setComputeV(false);
+
+    ssvd.setOverwrite(true);
+    ssvd.setQ(q);
+    ssvd.setBroadcast(true);
+    ssvd.run();
+
+    Vector stochasticSValues = ssvd.getSingularValues();
+    System.out.println("--SSVD solver singular values:");
+    LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues);
+    System.out.println("--Colt SVD solver singular values:");
+
+    // try to run the same thing without stochastic algo
+    double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);
+
+    // subtract pseudo pca mean
+    for (int i = 0; i < m; i++)
+      for (int j = 0; j < n; j++)
+        a[i][j] -= xi.getQuick(j);
+
+    SingularValueDecomposition svd2 =
+      new SingularValueDecomposition(new DenseMatrix(a));
+
+    Vector svalues2 = new DenseVector(svd2.getSingularValues());
+    LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2);
+
+    for (int i = 0; i < k + p; i++) {
+      assertTrue(Math.abs(svalues2.getQuick(i) - 
stochasticSValues.getQuick(i)) <= s_epsilon);
+    }
+
+    double[][] mQ =
+      SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
+          + BtJob.OUTPUT_Q + "-*"), conf);
+
+    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ),
+                                           false,
+                                           s_epsilon);
+
+  }
+
+}

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
 Wed Feb 22 21:57:27 2012
@@ -41,10 +41,12 @@ public class LocalSSVDSolverDenseTest ex
 
   private static final double s_epsilon = 1.0E-10d;
 
-  // I actually never saw errors more than 3% worst case for this test,
-  // but since it's non-deterministic test, it still may occasionally produce
-  // bad results with a non-zero probability, so i put this pct% for error
-  // margin high enough so it (almost) never fails.
+  /*
+   * I actually never saw errors more than 3% worst case for this particular
+   * test, but since it's non-deterministic test, it still may occasionally
+   * produce bad results with a non-zero probability, so i put this pct% for
+   * error margin high enough so it (almost) never fails.
+   */
   private static final double s_precisionPct = 10;
 
   @Test
@@ -135,7 +137,7 @@ public class LocalSSVDSolverDenseTest ex
     ssvd.setBroadcast(false);
     ssvd.run();
 
-    double[] stochasticSValues = ssvd.getSingularValues();
+    Vector stochasticSValues = ssvd.getSingularValues();
     System.out.println("--SSVD solver singular values:");
     dumpSv(stochasticSValues);
 
@@ -167,36 +169,36 @@ public class LocalSSVDSolverDenseTest ex
     // used to generate surrogate input
 
     for (int i = 0; i < k; i++) {
-      assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues[i])
+      assertTrue(Math.abs((singularValues.getQuick(i) - 
stochasticSValues.getQuick(i))
           / singularValues.getQuick(i)) <= s_precisionPct / 100);
     }
 
     double[][] mQ =
-      SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
+      SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
           + BtJob.OUTPUT_Q + "-*"), conf);
 
-    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(mQ),
+    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ),
                                            false,
                                            s_epsilon);
 
     double[][] u =
-      SSVDSolver.loadDistributedRowMatrix(fs,
+      SSVDHelper.loadDistributedRowMatrix(fs,
                                           new Path(svdOutPath, "U/[^_]*"),
                                           conf);
 
-    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(u), false, 
s_epsilon);
+    SSVDCommonTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon);
     double[][] v =
-      SSVDSolver.loadDistributedRowMatrix(fs,
+      SSVDHelper.loadDistributedRowMatrix(fs,
                                           new Path(svdOutPath, "V/[^_]*"),
                                           conf);
 
-    SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(v), false, 
s_epsilon);
+    SSVDCommonTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon);
   }
 
-  static void dumpSv(double[] s) {
+  static void dumpSv(Vector s) {
     System.out.printf("svs: ");
-    for (double value : s) {
-      System.out.printf("%f  ", value);
+    for (Vector.Element el : s) {
+      System.out.printf("%f  ", el.get());
     }
     System.out.println();
 

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
 Wed Feb 22 21:57:27 2012
@@ -24,8 +24,6 @@ import java.util.Deque;
 import java.util.LinkedList;
 import java.util.Random;
 
-import com.google.common.io.Closeables;
-
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
@@ -36,12 +34,15 @@ import org.apache.hadoop.io.compress.Def
 import org.apache.mahout.common.MahoutTestCase;
 import org.apache.mahout.common.RandomUtils;
 import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
 import org.apache.mahout.math.SequentialAccessSparseVector;
 import org.apache.mahout.math.SingularValueDecomposition;
 import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.VectorWritable;
 import org.junit.Test;
 
+import com.google.common.io.Closeables;
+
 /**
  * 
  * Tests SSVD solver with a made-up data running hadoop solver in a local mode.
@@ -58,15 +59,15 @@ public class LocalSSVDSolverSparseSequen
    * removing from tests to reduce test running time
    */
   /* @Test */
-  public void testSSVDSolverSparse() throws IOException { 
+  public void testSSVDSolverSparse() throws IOException {
     runSSVDSolver(0);
   }
-  
+
   @Test
-  public void testSSVDSolverPowerIterations1() throws IOException { 
+  public void testSSVDSolverPowerIterations1() throws IOException {
     runSSVDSolver(1);
   }
-  
+
   public void runSSVDSolver(int q) throws IOException {
 
     Configuration conf = new Configuration();
@@ -142,45 +143,46 @@ public class LocalSSVDSolverSparseSequen
                      3);
     ssvd.setOuterBlockHeight(500);
     ssvd.setAbtBlockHeight(251);
-    
+
     /*
      * removing V,U jobs from this test to reduce running time. i will keep 
them
      * put in the dense test though.
      */
     ssvd.setComputeU(false);
     ssvd.setComputeV(false);
-    
+
     ssvd.setOverwrite(true);
     ssvd.setQ(q);
     ssvd.setBroadcast(true);
     ssvd.run();
 
-    double[] stochasticSValues = ssvd.getSingularValues();
+    Vector stochasticSValues = ssvd.getSingularValues();
     System.out.println("--SSVD solver singular values:");
     dumpSv(stochasticSValues);
     System.out.println("--Colt SVD solver singular values:");
 
     // try to run the same thing without stochastic algo
-    double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf);
+    double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);
 
     // SingularValueDecompositionImpl svd=new 
SingularValueDecompositionImpl(new
     // Array2DRowRealMatrix(a));
     SingularValueDecomposition svd2 =
       new SingularValueDecomposition(new DenseMatrix(a));
 
-    double[] svalues2 = svd2.getSingularValues();
+    Vector svalues2 = new DenseVector(svd2.getSingularValues());
     dumpSv(svalues2);
 
     for (int i = 0; i < k + p; i++) {
-      assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon);
+      assertTrue(Math.abs(svalues2.getQuick(i) - 
stochasticSValues.getQuick(i)) <= s_epsilon);
     }
 
     double[][] mQ =
-      SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
+      SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
           + BtJob.OUTPUT_Q + "-*"), conf);
 
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon);
+    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ),
+                                           false,
+                                           s_epsilon);
 
     /*
      * removing tests on U and V to keep this test leaner. I will keep U,V
@@ -188,27 +190,22 @@ public class LocalSSVDSolverSparseSequen
      */
 
     /*
-    double[][] u =
-      SSVDSolver.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "U/[^_]*"),
-                                          conf);
-
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(u), false, s_epsilon);
-    double[][] v =
-      SSVDSolver.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "V/[^_]*"),
-                                          conf);
-
-    SSVDPrototypeTest
-      .assertOrthonormality(new DenseMatrix(v), false, s_epsilon);
-    */
+     * double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new
+     * Path(svdOutPath, "U/[^_]*"), conf);
+     * 
+     * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(u), false,
+     * s_epsilon); double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new
+     * Path(svdOutPath, "V/[^_]*"), conf);
+     * 
+     * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(v), false,
+     * s_epsilon);
+     */
   }
 
-  static void dumpSv(double[] s) {
+  static void dumpSv(Vector s) {
     System.out.printf("svs: ");
-    for (double value : s) {
-      System.out.printf("%f  ", value);
+    for (Vector.Element el : s) {
+      System.out.printf("%f  ", el.get());
     }
     System.out.println();
 

Copied: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java
 (from r1245615, 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java)
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java&r1=1245615&r2=1292532&rev=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java
 Wed Feb 22 21:57:27 2012
@@ -28,27 +28,21 @@ import org.apache.mahout.math.function.D
 import org.apache.mahout.math.hadoop.stochasticsvd.qr.GivensThinSolver;
 import org.junit.Test;
 
-/** 
- * Tests parts of of Stochastic SVD solver code in local mode
- * using "prototype" code (class that simulates processes 
- * actually happenning in the MR jobs).
+/**
+ * Shared ssvd test code
  */
-public class SSVDPrototypeTest extends MahoutTestCase {
+public class SSVDCommonTest extends MahoutTestCase {
 
   private static final double SCALE = 1000;
   private static final double SVD_EPSILON = 1.0e-10;
 
   @Test
-  public void testSSVDPrototype() throws Exception {
-    SSVDPrototype.main(null);
-  }
-
-  @Test
   public void testGivensQR() throws Exception {
     // DenseMatrix m = new DenseMatrix(dims<<2,dims);
     Matrix m = new DenseMatrix(3, 3);
     m.assign(new DoubleFunction() {
       private final Random rnd = RandomUtils.getRandom();
+
       @Override
       public double apply(double arg0) {
         return rnd.nextDouble() * SCALE;
@@ -65,21 +59,25 @@ public class SSVDPrototypeTest extends M
     m.setQuick(2, 1, 8);
     m.setQuick(2, 2, 9);
 
-    GivensThinSolver qrSolver = new GivensThinSolver(m.rowSize(), 
m.columnSize());
+    GivensThinSolver qrSolver =
+      new GivensThinSolver(m.rowSize(), m.columnSize());
     qrSolver.solve(m);
 
     Matrix qtm = new DenseMatrix(qrSolver.getThinQtTilde());
 
     assertOrthonormality(qtm.transpose(), false, SVD_EPSILON);
 
-    Matrix aClone = new DenseMatrix(qrSolver.getThinQtTilde()).transpose()
-        .times(qrSolver.getRTilde());
+    Matrix aClone =
+      new DenseMatrix(qrSolver.getThinQtTilde()).transpose()
+                                                .times(qrSolver.getRTilde());
 
     System.out.println("aclone : " + aClone);
 
   }
 
-  public static void assertOrthonormality(Matrix mtx, boolean 
insufficientRank, double epsilon) {
+  public static void assertOrthonormality(Matrix mtx,
+                                          boolean insufficientRank,
+                                          double epsilon) {
     int n = mtx.columnSize();
     int rank = 0;
     for (int i = 0; i < n; i++) {
@@ -99,7 +97,8 @@ public class SSVDPrototypeTest extends M
         assertTrue(Math.abs((i == j && rank > j ? 1 : 0) - dot) < epsilon);
       }
     }
-    assertTrue((!insufficientRank && rank == n) || (insufficientRank && rank < 
n));
+    assertTrue((!insufficientRank && rank == n)
+        || (insufficientRank && rank < n));
 
   }
 

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java?rev=1292532&r1=1292531&r2=1292532&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java
 Wed Feb 22 21:57:27 2012
@@ -108,7 +108,7 @@ public class SSVDTestsHelper {
       }
     }
     GramSchmidt.orthonormalizeColumns(result);
-    SSVDPrototypeTest.assertOrthonormality(result, false, 1.0e-10);
+    SSVDCommonTest.assertOrthonormality(result, false, 1.0e-10);
     return result;
   }
 


Reply via email to