Author: dlyubimov
Date: Thu Aug  8 06:05:32 2013
New Revision: 1511590

URL: http://svn.apache.org/r1511590
Log:
MAHOUT-1306: fixing PCA tests; -ow flag causing PCA errors

Squashed commit of the following:

commit cefe0d4863955288f304683ea25cc97047805bdd
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Aug 7 22:50:48 2013 -0700

    disabling sparse SSVD PCA test with -q=0 (leaving -q=1) in the interest of 
smaller test run time.

commit 412f9c5e06bf2a510948475e7d670f91d18d3386
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Aug 7 22:36:17 2013 -0700

    cleaning up certain things; pca tests are asserting names and keys 
propagation

commit 4b002b30ddd4038311c003c77b7ba543eadcda98
Merge: 15b9a05 70777c4
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Aug 7 21:27:57 2013 -0700

    Merge branch 'trunk' into PCA_fixes

commit 15b9a056603f703707672e601c1e3c13dbd00b8f
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Aug 7 15:27:58 2013 -0700

    reverting test back to q=1

commit 8e91cc6ae5056b159fa6208a26b5a4f0e002bd8c
Author: Dmitriy Lyubimov <[email protected]>
Date:   Wed Aug 7 13:12:36 2013 -0700

    test fixes

Added:
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java
Removed:
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java
Modified:
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
    
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java
 Thu Aug  8 06:05:32 2013
@@ -100,6 +100,8 @@ public final class QJob {
       String sbPathStr = conf.get(PROP_SB_PATH);
       if (sbPathStr != null) {
         sb = SSVDHelper.loadAndSumUpVectors(new Path(sbPathStr), conf);
+        if (sb == null)
+          throw new IOException(String.format("Unable to load s_omega from 
path %s.", sbPathStr));
       }
 
       outputs = new MultipleOutputs(new JobConf(conf));

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java
 Thu Aug  8 06:05:32 2013
@@ -17,13 +17,15 @@
 
 package org.apache.mahout.math.hadoop.stochasticsvd;
 
+import java.io.Closeable;
 import java.io.IOException;
+import java.util.*;
 import java.util.Arrays;
-import java.util.Comparator;
-import java.util.List;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
+import com.google.common.base.Function;
+import com.google.common.collect.Iterators;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
@@ -31,11 +33,11 @@ import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.io.IntWritable;
 import org.apache.hadoop.io.SequenceFile;
 import org.apache.hadoop.io.Writable;
-import org.apache.mahout.common.iterator.sequencefile.PathFilters;
-import org.apache.mahout.common.iterator.sequencefile.PathType;
-import 
org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator;
-import 
org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable;
+import org.apache.mahout.common.IOUtils;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.iterator.sequencefile.*;
 import org.apache.mahout.math.*;
+import org.apache.mahout.math.Vector;
 import org.apache.mahout.math.function.Functions;
 
 import com.google.common.collect.Lists;
@@ -43,7 +45,6 @@ import com.google.common.io.Closeables;
 
 /**
  * set of small file manipulation helpers.
- *
  */
 public final class SSVDHelper {
 
@@ -149,13 +150,13 @@ public final class SSVDHelper {
         matcher.reset(o1.getPath().getName());
         if (!matcher.matches()) {
           throw new IllegalArgumentException("Unexpected file name, unable to 
deduce partition #:"
-              + o1.getPath());
+                                               + o1.getPath());
         }
         int p1 = Integer.parseInt(matcher.group(3));
         matcher.reset(o2.getPath().getName());
         if (!matcher.matches()) {
           throw new IllegalArgumentException("Unexpected file name, unable to 
deduce partition #:"
-              + o2.getPath());
+                                               + o2.getPath());
         }
 
         int p2 = Integer.parseInt(matcher.group(3));
@@ -164,49 +165,63 @@ public final class SSVDHelper {
 
     };
 
+  public static Iterator<Pair<Writable, Vector>> drmIterator(FileSystem fs, 
Path glob, Configuration conf,
+                                                             Deque<Closeable> 
closeables)
+    throws IOException {
+    SequenceFileDirIterator<Writable, VectorWritable> ret =
+      new SequenceFileDirIterator<Writable, VectorWritable>(glob,
+                                                            PathType.GLOB,
+                                                            
PathFilters.logsCRCFilter(),
+                                                            
PARTITION_COMPARATOR,
+                                                            true,
+                                                            conf);
+    closeables.addFirst(ret);
+    return Iterators.transform(ret, new Function<Pair<Writable, 
VectorWritable>, Pair<Writable, Vector>>() {
+      @Override
+      public Pair<Writable, Vector> apply(Pair<Writable, VectorWritable> p) {
+        return new Pair(p.getFirst(), p.getSecond().get());
+      }
+    });
+  }
+
   /**
    * helper capabiltiy to load distributed row matrices into dense matrix (to
    * support tests mainly).
    *
-   * @param fs
-   *          filesystem
-   * @param glob
-   *          FS glob
-   * @param conf
-   *          configuration
+   * @param fs   filesystem
+   * @param glob FS glob
+   * @param conf configuration
    * @return Dense matrix array
    */
-  public static double[][] loadDistributedRowMatrix(FileSystem fs, Path glob, 
Configuration conf) throws IOException {
+  public static DenseMatrix drmLoadAsDense(FileSystem fs, Path glob, 
Configuration conf) throws IOException {
 
-    FileStatus[] files = fs.globStatus(glob);
-    if (files == null) {
-      return null;
-    }
-
-    List<double[]> denseData = Lists.newArrayList();
-
-    /*
-     * assume it is partitioned output, so we need to read them up in order of
-     * partitions.
-     */
-    Arrays.sort(files, PARTITION_COMPARATOR);
-
-    for (FileStatus fstat : files) {
-      for (VectorWritable value : new 
SequenceFileValueIterable<VectorWritable>(fstat.getPath(),
-                                                                               
 true,
-                                                                               
 conf)) {
-        Vector v = value.get();
-        int size = v.size();
-        double[] row = new double[size];
-        for (int i = 0; i < size; i++) {
-          row[i] = v.get(i);
+    Deque<Closeable> closeables = new ArrayDeque<Closeable>();
+    try {
+      List<double[]> denseData = new ArrayList<double[]>();
+      for (Iterator<Pair<Writable, Vector>> iter = drmIterator(fs, glob, conf, 
closeables);
+           iter.hasNext(); ) {
+        Pair<Writable, Vector> p = iter.next();
+        Vector v = p.getSecond();
+        double[] dd = new double[v.size()];
+        if (v.isDense()) {
+          for (int i = 0; i < v.size(); i++) {
+            dd[i] = v.getQuick(i);
+          }
+        } else {
+          for (Vector.Element el : v.nonZeroes()) {
+            dd[el.index()] = el.get();
+          }
         }
-        // ignore row label.
-        denseData.add(row);
+        denseData.add(dd);
       }
+      if (denseData.size() == 0) {
+        return null;
+      } else {
+        return new DenseMatrix(denseData.toArray(new 
double[denseData.size()][]));
+      }
+    } finally {
+      IOUtils.close(closeables);
     }
-
-    return denseData.toArray(new double[denseData.size()][]);
   }
 
   /**

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java
 Thu Aug  8 06:05:32 2013
@@ -112,6 +112,9 @@ public final class SSVDSolver {
   private boolean broadcast = true;
   private Path pcaMeanPath;
 
+  // for debugging
+  private long omegaSeed;
+
   /**
    * create new SSVD solver. Required parameters are passed to constructor to
    * ensure they are set. Optional parameters can be set using setters .
@@ -341,6 +344,10 @@ public final class SSVDSolver {
     this.pcaMeanPath = pcaMeanPath;
   }
 
+  long getOmegaSeed() {
+    return omegaSeed;
+  }
+
   /**
    * run all SSVD jobs.
    *
@@ -367,11 +374,15 @@ public final class SSVDSolver {
 
       Path pcaBasePath = new Path(outputPath, "pca");
 
+      if (overwrite) {
+        fs.delete(outputPath, true);
+      }
+
       if (pcaMeanPath != null) {
         fs.mkdirs(pcaBasePath);
       }
       Random rnd = RandomUtils.getRandom();
-      long seed = rnd.nextLong();
+      omegaSeed = rnd.nextLong();
 
       Path sbPath = null;
       double xisquaredlen = 0.0;
@@ -391,15 +402,10 @@ public final class SSVDSolver {
         }
 
         xisquaredlen = xi.dot(xi);
-        Omega omega = new Omega(seed, k + p);
+        Omega omega = new Omega(omegaSeed, k + p);
         Vector s_b0 = omega.mutlithreadedTRightMultiply(xi);
 
-        SSVDHelper.saveVector(s_b0, sbPath =
-          new Path(pcaBasePath, "somega.seq"), conf);
-      }
-
-      if (overwrite) {
-        fs.delete(outputPath, true);
+        SSVDHelper.saveVector(s_b0, sbPath = new Path(pcaBasePath, 
"somega.seq"), conf);
       }
 
       /*
@@ -415,7 +421,7 @@ public final class SSVDSolver {
                minSplitSize,
                k,
                p,
-               seed,
+               omegaSeed,
                reduceTasks);
 
       /*

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java
 Thu Aug  8 06:05:32 2013
@@ -144,8 +144,7 @@ public class UJob {
       Path sigmaPath = new 
Path(context.getConfiguration().get(PROP_SIGMA_PATH));
       FileSystem fs = FileSystem.get(uHatPath.toUri(), 
context.getConfiguration());
 
-      uHat = new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs,
-          uHatPath, context.getConfiguration()));
+      uHat = SSVDHelper.drmLoadAsDense(fs, uHatPath, 
context.getConfiguration());
       // since uHat is (k+p) x (k+p)
       kp = uHat.columnSize();
       k = context.getConfiguration().getInt(PROP_K, kp);

Modified: 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
 (original)
+++ 
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java
 Thu Aug  8 06:05:32 2013
@@ -101,8 +101,7 @@ public class VJob {
 
       Path sigmaPath = new Path(conf.get(PROP_SIGMA_PATH));
 
-      uHat =
-        new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs, uHatPath, 
conf));
+      uHat = SSVDHelper.drmLoadAsDense(fs, uHatPath, conf);
       // since uHat is (k+p) x (k+p)
       kp = uHat.columnSize();
       k = context.getConfiguration().getInt(PROP_K, kp);

Added: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java?rev=1511590&view=auto
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java
 (added)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java
 Thu Aug  8 06:05:32 2013
@@ -0,0 +1,297 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.hadoop.stochasticsvd;
+
+import com.google.common.collect.Lists;
+import com.google.common.io.Closeables;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.SequenceFile;
+import org.apache.hadoop.io.SequenceFile.CompressionType;
+import org.apache.hadoop.io.Text;
+import org.apache.hadoop.io.Writable;
+import org.apache.hadoop.io.compress.DefaultCodec;
+import org.apache.mahout.common.IOUtils;
+import org.apache.mahout.common.MahoutTestCase;
+import org.apache.mahout.common.Pair;
+import org.apache.mahout.common.RandomUtils;
+import org.apache.mahout.math.*;
+import org.apache.mahout.math.function.DoubleFunction;
+import org.apache.mahout.math.function.Functions;
+import org.apache.mahout.math.function.VectorFunction;
+import org.junit.Test;
+
+import java.io.Closeable;
+import java.io.File;
+import java.io.IOException;
+import java.util.Deque;
+import java.util.Iterator;
+import java.util.Random;
+
+public class LocalSSVDPCASparseTest extends MahoutTestCase {
+
+  private static final double s_epsilon = 1.0E-10d;
+
+  @Test
+  public void testOmegaTRightMultiply() {
+    final Random rnd = RandomUtils.getRandom();
+    final long seed = rnd.nextLong();
+    final int n = 2000;
+
+    final int kp = 100;
+
+    final Omega omega = new Omega(seed, kp);
+    final Matrix materializedOmega = new DenseMatrix(n, kp);
+    for (int i = 0; i < n; i++)
+      for (int j = 0; j < kp; j++)
+        materializedOmega.setQuick(i, j, omega.getQuick(i, j));
+    Vector xi = new DenseVector(n);
+    xi.assign(new DoubleFunction() {
+      @Override
+      public double apply(double x) {
+        return rnd.nextDouble() * 100;
+      }
+    });
+
+    Vector s_o = omega.mutlithreadedTRightMultiply(xi);
+
+    Matrix xiVector = new DenseMatrix(n, 1);
+    xiVector.assignColumn(0, xi);
+
+    Vector s_o_control = 
materializedOmega.transpose().times(xiVector).viewColumn(0);
+
+    assertEquals(0, s_o.minus(s_o_control).aggregate(Functions.PLUS, 
Functions.ABS), 1e-10);
+
+    System.out.printf("s_omega=\n%s\n", s_o);
+    System.out.printf("s_omega_control=\n%s\n", s_o_control);
+  }
+
+  @Test
+  public void runPCATest1() throws IOException {
+    runSSVDSolver(1);
+  }
+
+//  @Test
+  public void runPCATest0() throws IOException {
+    runSSVDSolver(0);
+  }
+
+
+  public void runSSVDSolver(int q) throws IOException {
+
+    Configuration conf = new Configuration();
+    conf.set("mapred.job.tracker", "local");
+    conf.set("fs.default.name", "file:///");
+
+    // conf.set("mapred.job.tracker","localhost:11011");
+    // conf.set("fs.default.name","hdfs://localhost:11010/");
+
+    Deque<Closeable> closeables = Lists.newLinkedList();
+    try {
+      Random rnd = RandomUtils.getRandom();
+
+      File tmpDir = getTestTempDir("svdtmp");
+      conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath());
+
+      Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq");
+
+      // create distributed row matrix-like struct
+      SequenceFile.Writer w =
+        SequenceFile.createWriter(FileSystem.getLocal(conf),
+                                  conf,
+                                  aLocPath,
+                                  Text.class,
+                                  VectorWritable.class,
+                                  CompressionType.BLOCK,
+                                  new DefaultCodec());
+      closeables.addFirst(w);
+
+      int n = 100;
+      int m = 2000;
+      double percent = 5;
+
+      VectorWritable vw = new VectorWritable();
+      Text rkey = new Text();
+
+      Vector xi = new DenseVector(n);
+
+      double muAmplitude = 50.0;
+      for (int i = 0; i < m; i++) {
+        Vector dv = new SequentialAccessSparseVector(n);
+        String rowname = "row-"+i;
+        NamedVector namedRow = new NamedVector(dv, rowname);
+        for (int j = 0; j < n * percent / 100; j++) {
+          dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25));
+        }
+        rkey.set("row-i"+i);
+        vw.set(namedRow);
+        w.append(rkey, vw);
+        xi.assign(dv, Functions.PLUS);
+      }
+      closeables.remove(w);
+      Closeables.close(w, false);
+
+      xi.assign(Functions.mult(1.0 / m));
+
+      FileSystem fs = FileSystem.get(conf);
+
+      Path tempDirPath = getTestTempDirPath("svd-proc");
+      Path aPath = new Path(tempDirPath, "A/A.seq");
+      fs.copyFromLocalFile(aLocPath, aPath);
+      Path xiPath = new Path(tempDirPath, "xi/xi.seq");
+      SSVDHelper.saveVector(xi, xiPath, conf);
+
+      Path svdOutPath = new Path(tempDirPath, "SSVD-out");
+
+      // make sure we wipe out previous test results, just a convenience
+      fs.delete(svdOutPath, true);
+
+      // Solver starts here:
+      System.out.println("Input prepared, starting solver...");
+
+      int ablockRows = 867;
+      int p = 60;
+      int k = 40;
+      SSVDSolver ssvd =
+        new SSVDSolver(conf,
+                       new Path[]{aPath},
+                       svdOutPath,
+                       ablockRows,
+                       k,
+                       p,
+                       3);
+      ssvd.setOuterBlockHeight(500);
+      ssvd.setAbtBlockHeight(251);
+      ssvd.setPcaMeanPath(xiPath);
+
+    /*
+     * Removing V,U jobs from this test to reduce running time. i will keep 
them
+     * put in the dense test though.
+     *
+     * For PCA test, we also want to request U*Sigma output and check it for 
named
+     * vector propagation.
+     */
+      ssvd.setComputeU(false);
+      ssvd.setComputeV(false);
+      ssvd.setcUSigma(true);
+
+      ssvd.setOverwrite(true);
+      ssvd.setQ(q);
+      ssvd.setBroadcast(true);
+      ssvd.run();
+
+      Vector stochasticSValues = ssvd.getSingularValues();
+
+      // try to run the same thing without stochastic algo
+      Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf);
+
+      verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k 
+ p, q);
+
+      // subtract pseudo pca mean
+      for (int i = 0; i < m; i++) {
+        a.viewRow(i).assign(xi, Functions.MINUS);
+      }
+
+      SingularValueDecomposition svd2 =
+        new SingularValueDecomposition(a);
+
+      Vector svalues2 = new DenseVector(svd2.getSingularValues());
+
+      System.out.println("--SSVD solver singular values:");
+      LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues);
+      System.out.println("--SVD solver singular values:");
+      LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2);
+
+      for (int i = 0; i < k + p; i++) {
+        assertTrue(Math.abs(svalues2.getQuick(i) - 
stochasticSValues.getQuick(i)) <= s_epsilon);
+      }
+
+      DenseMatrix mQ =
+        SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/"
+          + BtJob.OUTPUT_Q + "-*"), conf);
+
+      SSVDCommonTest.assertOrthonormality(mQ,
+                                          false,
+                                          s_epsilon);
+
+      // assert name propagation
+      for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs,
+                                                                          new 
Path(ssvd.getuSigmaPath()+"/*"),
+                                                                          conf,
+                                                                          
closeables); iter.hasNext(); ) {
+        Pair<Writable, Vector> pair = iter.next();
+        Writable key = pair.getFirst();
+        Vector v = pair.getSecond();
+
+        assertTrue(v instanceof NamedVector);
+        assertTrue(key instanceof Text);
+      }
+
+    } finally {
+      IOUtils.close(closeables);
+    }
+  }
+
+  private void verifyInternals(Path tempDir, Matrix a, Omega omega, int kp, 
int q) {
+    int m = a.numRows();
+    int n = a.numCols();
+
+    Vector xi = a.aggregateColumns(new VectorFunction() {
+      @Override
+      public double apply(Vector v) {
+        return v.zSum() / v.size();
+      }
+    });
+
+    // materialize omega
+    Matrix momega = new DenseMatrix(n, kp);
+    for (int i = 0; i < n; i++)
+      for (int j = 0; j < kp; j++)
+        momega.setQuick(i, j, omega.getQuick(i, j));
+
+    Vector s_o = omega.mutlithreadedTRightMultiply(xi);
+
+    System.out.printf("s_omega=\n%s\n", s_o);
+
+    Matrix y = a.times(momega);
+    for (int i = 0; i < n; i++) y.viewRow(i).assign(s_o, Functions.MINUS);
+
+    QRDecomposition qr = new QRDecomposition(y);
+    Matrix qm = qr.getQ();
+
+    Vector s_q = qm.aggregateColumns(new VectorFunction() {
+      @Override
+      public double apply(Vector v) {
+        return v.zSum();
+      }
+    });
+
+    System.out.printf("s_q=\n%s\n", s_q);
+
+    Matrix b = qm.transpose().times(a);
+
+    Vector s_b = b.times(xi);
+
+    System.out.printf("s_b=\n%s\n", s_b);
+
+
+  }
+
+}

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java
 Thu Aug  8 06:05:32 2013
@@ -150,7 +150,7 @@ public class LocalSSVDSolverDenseTest ex
      * System.out.println("--Colt SVD solver singular values:"); // try to run
      * 
      * the same thing without stochastic algo double[][] a =
-     * SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf);
+     * SSVDSolver.drmLoadAsDense(fs, aPath, conf);
      * 
      * 
      * 
@@ -173,26 +173,25 @@ public class LocalSSVDSolverDenseTest ex
           / singularValues.getQuick(i)) <= s_precisionPct / 100);
     }
 
-    double[][] mQ =
-      SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
-          + BtJob.OUTPUT_Q + "-*"), conf);
-
-    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ),
-                                           false,
-                                           s_epsilon);
-
-    double[][] u =
-      SSVDHelper.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "U/[^_]*"),
-                                          conf);
-
-    SSVDCommonTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon);
-    double[][] v =
-      SSVDHelper.loadDistributedRowMatrix(fs,
-                                          new Path(svdOutPath, "V/[^_]*"),
-                                          conf);
-
-    SSVDCommonTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon);
+    DenseMatrix mQ =
+      SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/"
+        + BtJob.OUTPUT_Q + "-*"), conf);
+
+    SSVDCommonTest.assertOrthonormality(mQ,
+                                        false,
+                                        s_epsilon);
+
+    DenseMatrix u =
+      SSVDHelper.drmLoadAsDense(fs,
+                                new Path(svdOutPath, "U/*"),
+                                conf);
+    SSVDCommonTest.assertOrthonormality(u, false, s_epsilon);
+
+    DenseMatrix v =
+      SSVDHelper.drmLoadAsDense(fs,
+                                new Path(svdOutPath, "V/*"),
+                                conf);
+    SSVDCommonTest.assertOrthonormality(v, false, s_epsilon);
   }
 
   static void dumpSv(Vector s) {
@@ -204,13 +203,4 @@ public class LocalSSVDSolverDenseTest ex
 
   }
 
-  static void dump(double[][] matrix) {
-    for (double[] aMatrix : matrix) {
-      for (double anAMatrix : aMatrix) {
-        System.out.printf("%f  ", anAMatrix);
-      }
-      System.out.println();
-    }
-  }
-
 }

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1511590&r1=1511589&r2=1511590&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java
 Thu Aug  8 06:05:32 2013
@@ -163,12 +163,12 @@ public class LocalSSVDSolverSparseSequen
     System.out.println("--Colt SVD solver singular values:");
 
     // try to run the same thing without stochastic algo
-    double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf);
+    DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf);
 
     // SingularValueDecompositionImpl svd=new 
SingularValueDecompositionImpl(new
     // Array2DRowRealMatrix(a));
     SingularValueDecomposition svd2 =
-      new SingularValueDecomposition(new DenseMatrix(a));
+      new SingularValueDecomposition(a);
 
     Vector svalues2 = new DenseVector(svd2.getSingularValues());
     dumpSv(svalues2);
@@ -177,13 +177,13 @@ public class LocalSSVDSolverSparseSequen
       assertTrue(Math.abs(svalues2.getQuick(i) - 
stochasticSValues.getQuick(i)) <= s_epsilon);
     }
 
-    double[][] mQ =
-      SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/"
-          + BtJob.OUTPUT_Q + "-*"), conf);
-
-    SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ),
-                                           false,
-                                           s_epsilon);
+    DenseMatrix mQ =
+      SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/"
+        + BtJob.OUTPUT_Q + "-*"), conf);
+
+    SSVDCommonTest.assertOrthonormality(mQ,
+                                        false,
+                                        s_epsilon);
 
     IOUtils.close(closeables);
   }


Reply via email to