Author: tdunning
Date: Thu Dec 22 23:51:50 2011
New Revision: 1222516

URL: http://svn.apache.org/viewvc?rev=1222516&view=rev
Log:
MAHOUT-792 - Made tests use odd sizes to detect row/column confusion.  Fixed 
small errors in out of core SVD

Modified:
    
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
    
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
    
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java

Modified: 
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java?rev=1222516&r1=1222515&r2=1222516&view=diff
==============================================================================
--- 
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
 (original)
+++ 
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
 Thu Dec 22 23:51:50 2011
@@ -7,6 +7,7 @@ import org.apache.mahout.math.Matrix;
 import org.apache.mahout.math.MatrixWritable;
 import org.apache.mahout.math.RandomTrinaryMatrix;
 import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.function.Functions;
 import org.junit.After;
 import org.junit.Assert;
 import org.junit.Before;
@@ -46,7 +47,7 @@ public class SequentialOutOfCoreSvdTest 
 
   @Test
   public void testSingularValues() throws IOException {
-    Matrix A = lowRankMatrix(tmpDir, "A", 200);
+    Matrix A = lowRankMatrix(tmpDir, "A", 200, 970, 1020);
 
     List<File> partsOfA = Arrays.asList(tmpDir.listFiles(new FilenameFilter() {
       @Override
@@ -54,11 +55,12 @@ public class SequentialOutOfCoreSvdTest 
         return s.matches("A-.*");
       }
     }));
-    SequentialOutOfCoreSvd s = new SequentialOutOfCoreSvd(partsOfA, "U", "V", 
tmpDir, 100, 200);
-    SequentialBigSvd svd = new SequentialBigSvd(A, 20);
+    SequentialOutOfCoreSvd s = new SequentialOutOfCoreSvd(partsOfA, "U", "V", 
tmpDir, 100, 210);
+    SequentialBigSvd svd = new SequentialBigSvd(A, 100);
 
     Vector reference = new DenseVector(svd.getSingularValues()).viewPart(0, 6);
-    assertEquals(0, reference.minus(s.getSingularValues().viewPart(0, 
6)).maxValue(), 1e-9);
+    Vector actual = s.getSingularValues().viewPart(0, 6);
+    assertEquals(0, reference.minus(actual).maxValue(), 1e-9);
 
     s.computeU(partsOfA, "U-", tmpDir);
     Matrix u = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new 
FilenameFilter() {
@@ -66,7 +68,7 @@ public class SequentialOutOfCoreSvdTest 
       public boolean accept(File file, String s) {
         return s.matches("U-.*");
       }
-    })), A.rowSize(), 15);
+    })));
 
     s.computeV(tmpDir, "V-", A.columnSize());
     Matrix v = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new 
FilenameFilter() {
@@ -74,14 +76,17 @@ public class SequentialOutOfCoreSvdTest 
       public boolean accept(File file, String s) {
         return s.matches("V-.*");
       }
-    })), A.rowSize(), 15);
+    })));
 
-    assertEquals(A, u.times(new 
DiagonalMatrix(s.getSingularValues())).times(v.transpose()));
+    // The values in A are pretty big so this is a pretty tight relative 
tolerance
+    assertEquals(0, A.minus(u.times(new 
DiagonalMatrix(s.getSingularValues())).times(v.transpose())).aggregate(Functions.PLUS,
 Functions.ABS), 1e-7);
   }
 
-  private Matrix readBlockMatrix(List<File> files, int nrows, int ncols) 
throws IOException {
+  private Matrix readBlockMatrix(List<File> files) throws IOException {
     Collections.sort(files);
-    Matrix r = new DenseMatrix(nrows, ncols);
+    int nrows = -1;
+    int ncols = -1;
+    Matrix r = null;
 
     MatrixWritable m = new MatrixWritable();
 
@@ -90,9 +95,17 @@ public class SequentialOutOfCoreSvdTest 
       DataInputStream in = new DataInputStream(new FileInputStream(file));
       m.readFields(in);
       in.close();
+      if (nrows == -1) {
+        nrows = m.get().rowSize() * files.size();
+        ncols = m.get().columnSize();
+        r = new DenseMatrix(nrows, ncols);
+      }
       r.viewPart(row, m.get().rowSize(), 0, r.columnSize()).assign(m.get());
       row += m.get().rowSize();
     }
+    if (row != nrows && r != null) {
+      r = r.viewPart(0, row, 0, ncols);
+    }
     return r;
   }
 
@@ -129,19 +142,19 @@ public class SequentialOutOfCoreSvdTest 
 //    assertEquals(v1, v2);
 //  }
 
-  private Matrix lowRankMatrix(File tmpDir, String aBase, int rowsPerSlice) 
throws IOException {
+  private Matrix lowRankMatrix(File tmpDir, String aBase, int rowsPerSlice, 
int rows, int columns) throws IOException {
     int rank = 10;
-    Matrix u = new RandomTrinaryMatrix(1, 1000, rank, false);
+    Matrix u = new RandomTrinaryMatrix(1, rows, rank, false);
     Matrix d = new DenseMatrix(rank, rank);
     d.set(0, 0, 5);
     d.set(1, 1, 3);
     d.set(2, 2, 1);
-    d.set(3, 3, 0);
-    Matrix v = new RandomTrinaryMatrix(2, 1000, rank, false);
+    d.set(3, 3, 0.5);
+    Matrix v = new RandomTrinaryMatrix(2, columns, rank, false);
     Matrix a = u.times(d).times(v.transpose());
 
     for (int i = 0; i < a.rowSize(); i += rowsPerSlice) {
-      MatrixWritable m = new MatrixWritable(a.viewPart(i, rowsPerSlice, 0, 
a.columnSize()));
+      MatrixWritable m = new MatrixWritable(a.viewPart(i, Math.min(a.rowSize() 
- i, rowsPerSlice), 0, a.columnSize()));
       DataOutputStream out = new DataOutputStream(new FileOutputStream(new 
File(tmpDir, String.format("%s-%09d", aBase, i))));
       try {
         m.write(out);

Modified: 
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java?rev=1222516&r1=1222515&r2=1222516&view=diff
==============================================================================
--- 
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
 (original)
+++ 
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
 Thu Dec 22 23:51:50 2011
@@ -38,7 +38,7 @@ public class SequentialBigSvd {
 
   public SequentialBigSvd(Matrix A, int p) {
     // Y = A * \Omega
-    y = A.times(new RandomTrinaryMatrix(A.rowSize(), p));
+    y = A.times(new RandomTrinaryMatrix(A.columnSize(), p));
 
     // R'R = Y' Y
     cd1 = new CholeskyDecomposition(y.transpose().times(y));

Modified: 
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
URL: 
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java?rev=1222516&r1=1222515&r2=1222516&view=diff
==============================================================================
--- 
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
 (original)
+++ 
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
 Thu Dec 22 23:51:50 2011
@@ -46,13 +46,14 @@ public class SequentialBigSvdTest extend
   public void testLeftVectors() {
     Matrix A = lowRankMatrix();
 
-    SequentialBigSvd s = new SequentialBigSvd(A, 6);
+    SequentialBigSvd s = new SequentialBigSvd(A, 8);
     SingularValueDecomposition svd = new SingularValueDecomposition(A);
 
-    // can only check first few singular vectors
-    Matrix u1 = svd.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
-    Matrix u2 = s.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
-    assertEquals(u1, u2);
+    // can only check first few singular vectors because once the singular 
values
+    // go to zero, the singular vectors are not uniquely determined
+    Matrix u1 = svd.getU().viewPart(0, 20, 0, 4).assign(Functions.ABS);
+    Matrix u2 = s.getU().viewPart(0, 20, 0, 4).assign(Functions.ABS);
+    assertEquals(0, u1.minus(u2).aggregate(Functions.PLUS, Functions.ABS), 
1e-9);
   }
 
   private void assertEquals(Matrix u1, Matrix u2) {
@@ -77,12 +78,8 @@ public class SequentialBigSvdTest extend
 
   private Matrix lowRankMatrix() {
     Matrix u = new RandomTrinaryMatrix(1, 20, 4, false);
-    Matrix d = new DenseMatrix(4, 4);
-    d.set(0, 0, 5);
-    d.set(1, 1, 3);
-    d.set(2, 2, 1);
-    d.set(3, 3, 0);
-    Matrix v = new RandomTrinaryMatrix(2, 20, 4, false);
+    Matrix d = new DiagonalMatrix(new double[]{5, 3, 1, 0.5});
+    Matrix v = new RandomTrinaryMatrix(2, 23, 4, false);
 
     return u.times(d).times(v.transpose());
   }


Reply via email to