Author: tdunning
Date: Thu Dec 22 23:51:41 2011
New Revision: 1222515
URL: http://svn.apache.org/viewvc?rev=1222515&view=rev
Log:
MAHOUT-792 - New SSVD codes.
MAHOUT-792 - Copyright fixes for Cholesky
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java
mahout/trunk/math/src/test/java/org/apache/mahout/math/CholeskyDecompositionTest.java
Added:
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java?rev=1222515&view=auto
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java
(added)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvd.java
Thu Dec 22 23:51:41 2011
@@ -0,0 +1,207 @@
+package org.apache.mahout.math.ssvd;
+
+import org.apache.mahout.math.CholeskyDecomposition;
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.MatrixWritable;
+import org.apache.mahout.math.RandomTrinaryMatrix;
+import org.apache.mahout.math.SingularValueDecomposition;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.function.Functions;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+
+/**
+ * Sequential block-oriented out of core SVD algorithm.
+ * <p/>
+ * The basic algorithm (in-core version) is that we do a random projects, get
a basis of that and
+ * then re-project the original matrix using that basis. This re-projected
matrix allows us to get
+ * an approximate SVD of the original matrix.
+ * <p/>
+ * The input to this program is a list of files that contain the sub-matrices
A_i. The result is a
+ * vector of singular values and optionally files that contain the left and
right singular vectors.
+ * <p/>
+ * Mathematically, to decompose A, we do this:
+ * <p/>
+ * Y = A * \Omega
+ * <p/>
+ * Q R = Y
+ * <p/>
+ * B = Q" A
+ * <p/>
+ * U D V' = B
+ * <p/>
+ * (Q U) D V' \approx A
+ * <p/>
+ * To do this out of core, we break A into blocks each with the same number of
rows. This gives a
+ * block-wise version of Y. As we are computing Y, we can also accumulate Y'
Y and when done, we
+ * can use a Cholesky decomposition to do the QR decomposition of Y in a
latent form. That gives us
+ * B in block-wise form and we can do the same trick to get an LQ of B. The L
part can be
+ * decomposed in memory. Then we can recombine to get the final decomposition.
+ * <p/>
+ * The details go like this. Start with a block form of A.
+ * <p/>
+ * Y_i = A_i * \Omega
+ * <p/>
+ * Instead of doing a QR decomposition of Y, we do a Cholesky decomposition of
Y' Y. This is a
+ * small in-memory operation. Q is large and dense and won't fit in memory.
+ * <p/>
+ * R' R = \sum_i Y_i' Y_i
+ * <p/>
+ * For reference, R is all we need to compute explicitly. Q will be computed
on the fly when needed.
+ * <p/>
+ * Q = Y R^-1
+ * <p/>
+ * B = Q" A = \sum_i (A \Omega R^-1)' A_i
+ * <p/>
+ * As B is generated, it needs to be segmented in row-wise blocks since it is
wide but not tall.
+ * This storage requires something like a map-reduce to accumulate the partial
sums. In this code,
+ * we do this by re-reading previously computed chunks and augmenting them.
+ * <p/>
+ * While the pieces of B are being computed, we can accumulate B B' in
preparation for a second
+ * Cholesky decomposition
+ * <p/>
+ * L L' = B B' = sum B_j B_j'
+ * <p/>
+ * Again, this is an LQ decomposition of BB', but we don't compute the Q part
explicitly. L will be
+ * small and thus tractable.
+ * <p/>
+ * Finally, we do the actual SVD decomposition.
+ * <p/>
+ * U_0 D V_0' = L
+ * <p/>
+ * D contains the singular values of A. The left and right singular values
can be reconstructed
+ * using Y and B. Note that both of these reconstructions can be done with
single passes through
+ * the blocked forms of Y and B.
+ * <p/>
+ * U = A \Omega R^{-1} U_0
+ * <p/>
+ * V = B' L'^{-1} V_0
+ */
+public class SequentialOutOfCoreSvd {
+
+ private final CholeskyDecomposition l2;
+ private final SingularValueDecomposition svd;
+ private final CholeskyDecomposition r2;
+ private final int columnsPerSlice;
+ private final int seed;
+ private final int dim;
+
+ public SequentialOutOfCoreSvd(Iterable<File> partsOfA, String uBase, String
vBase, File tmpDir, int internalDimension, int columnsPerSlice) throws
IOException {
+ this.columnsPerSlice = columnsPerSlice;
+ this.dim = internalDimension;
+
+ seed = 1;
+ Matrix y2 = null;
+
+ // step 1, compute R as in R'R = Y'Y where Y = A \Omega
+ for (File file : partsOfA) {
+ MatrixWritable m = new MatrixWritable();
+ m.readFields(new DataInputStream(new FileInputStream(file)));
+
+ Matrix aI = m.get();
+ Matrix omega = new RandomTrinaryMatrix(seed, aI.numCols(),
internalDimension, false);
+ Matrix y = aI.times(omega);
+
+ if (y2 == null) {
+ y2 = y.transpose().times(y);
+ } else {
+ y2.assign(y.transpose().times(y), Functions.PLUS);
+ }
+ }
+ r2 = new CholeskyDecomposition(y2);
+
+ // step 2, compute B
+ int ncols = 0;
+ for (File file : partsOfA) {
+ MatrixWritable m = new MatrixWritable();
+ m.readFields(new DataInputStream(new FileInputStream(file)));
+ Matrix aI = m.get();
+ ncols = Math.max(ncols, aI.numCols());
+
+ for (int j = 0; j + columnsPerSlice <= aI.numCols(); j +=
columnsPerSlice) {
+ Matrix omega = new RandomTrinaryMatrix(seed, aI.numCols(),
internalDimension, false);
+ Matrix bIJ =
r2.solveLeft(aI.times(omega).transpose().times(aI.viewPart(0, aI.numRows(), j,
columnsPerSlice)));
+ addToSavedCopy(bFile(tmpDir, j), bIJ);
+ }
+ }
+
+ // step 3, compute BB', L and SVD(L)
+ Matrix b2 = new DenseMatrix(internalDimension, internalDimension);
+ MatrixWritable bTmp = new MatrixWritable();
+ for (int j = 0; j < ncols; j += columnsPerSlice) {
+ if (bFile(tmpDir, j).exists()) {
+ bTmp.readFields(new DataInputStream(new FileInputStream(bFile(tmpDir,
j))));
+ b2.assign(bTmp.get().times(bTmp.get().transpose()), Functions.PLUS);
+ }
+ }
+ l2 = new CholeskyDecomposition(b2);
+ svd = new SingularValueDecomposition(l2.getL());
+ }
+
+ public void computeV(File tmpDir, String vBase, int ncols) throws
IOException {
+ // step 5, compute pieces of V
+ if (vBase != null) {
+ for (int j = 0; j < ncols; j += columnsPerSlice) {
+ if (bFile(tmpDir, j).exists()) {
+ MatrixWritable m = new MatrixWritable();
+ m.readFields(new DataInputStream(new FileInputStream(bFile(tmpDir,
j))));
+ m.set(l2.solveRight(m.get().transpose()).times(svd.getV()));
+ m.write(new DataOutputStream(new FileOutputStream(new File(tmpDir,
vBase + j / columnsPerSlice))));
+ }
+ }
+ }
+ }
+
+ public void computeU(Iterable<File> partsOfA, String uBase, File tmpDir)
throws IOException {
+ // step 4, compute pieces of U
+ if (uBase != null) {
+ int i = 0;
+ for (File file : partsOfA) {
+ MatrixWritable m = new MatrixWritable();
+ m.readFields(new DataInputStream(new FileInputStream(file)));
+ Matrix aI = m.get();
+
+ Matrix y = aI.times(new RandomTrinaryMatrix(seed, aI.numCols(), dim,
false).viewPart(i * columnsPerSlice, columnsPerSlice, 0, aI.numCols()));
+ m.set(r2.solveRight(y).times(svd.getU()));
+ m.write(new DataOutputStream(new FileOutputStream(new File(tmpDir,
uBase + i))));
+ i++;
+ }
+ }
+ }
+
+ private void addToSavedCopy(File file, Matrix matrix) throws IOException {
+ MatrixWritable mw = new MatrixWritable();
+ if (file.exists()) {
+ DataInputStream in = new DataInputStream(new FileInputStream(file));
+ try {
+ mw.readFields(in);
+ } finally {
+ in.close();
+ }
+ mw.get().assign(matrix, Functions.PLUS);
+ } else {
+ mw.set(matrix);
+ }
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(file));
+ try {
+ mw.write(out);
+ } finally {
+ out.close();
+ }
+ }
+
+ private File bFile(File tmpDir, int j) {
+ return new File(tmpDir, String.format("B-%09d", j));
+ }
+
+ public Vector getSingularValues() {
+ return new DenseVector(svd.getSingularValues());
+ }
+}
Added:
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java?rev=1222515&view=auto
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
(added)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/ssvd/SequentialOutOfCoreSvdTest.java
Thu Dec 22 23:51:41 2011
@@ -0,0 +1,155 @@
+package org.apache.mahout.math.ssvd;
+
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.DiagonalMatrix;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.MatrixWritable;
+import org.apache.mahout.math.RandomTrinaryMatrix;
+import org.apache.mahout.math.Vector;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.DataInputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+import static org.junit.Assert.assertEquals;
+
+public class SequentialOutOfCoreSvdTest {
+
+ private File tmpDir;
+
+ @Before
+ public void setup() throws IOException {
+ tmpDir = File.createTempFile("matrix", "");
+ Assert.assertTrue(tmpDir.delete());
+ Assert.assertTrue(tmpDir.mkdir());
+ }
+
+ @After
+ public void tearDown() {
+ for (File f : tmpDir.listFiles()) {
+ Assert.assertTrue(f.delete());
+ }
+ Assert.assertTrue(tmpDir.delete());
+ }
+
+ @Test
+ public void testSingularValues() throws IOException {
+ Matrix A = lowRankMatrix(tmpDir, "A", 200);
+
+ List<File> partsOfA = Arrays.asList(tmpDir.listFiles(new FilenameFilter() {
+ @Override
+ public boolean accept(File file, String s) {
+ return s.matches("A-.*");
+ }
+ }));
+ SequentialOutOfCoreSvd s = new SequentialOutOfCoreSvd(partsOfA, "U", "V",
tmpDir, 100, 200);
+ SequentialBigSvd svd = new SequentialBigSvd(A, 20);
+
+ Vector reference = new DenseVector(svd.getSingularValues()).viewPart(0, 6);
+ assertEquals(0, reference.minus(s.getSingularValues().viewPart(0,
6)).maxValue(), 1e-9);
+
+ s.computeU(partsOfA, "U-", tmpDir);
+ Matrix u = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new
FilenameFilter() {
+ @Override
+ public boolean accept(File file, String s) {
+ return s.matches("U-.*");
+ }
+ })), A.rowSize(), 15);
+
+ s.computeV(tmpDir, "V-", A.columnSize());
+ Matrix v = readBlockMatrix(Arrays.asList(tmpDir.listFiles(new
FilenameFilter() {
+ @Override
+ public boolean accept(File file, String s) {
+ return s.matches("V-.*");
+ }
+ })), A.rowSize(), 15);
+
+ assertEquals(A, u.times(new
DiagonalMatrix(s.getSingularValues())).times(v.transpose()));
+ }
+
+ private Matrix readBlockMatrix(List<File> files, int nrows, int ncols)
throws IOException {
+ Collections.sort(files);
+ Matrix r = new DenseMatrix(nrows, ncols);
+
+ MatrixWritable m = new MatrixWritable();
+
+ int row = 0;
+ for (File file : files) {
+ DataInputStream in = new DataInputStream(new FileInputStream(file));
+ m.readFields(in);
+ in.close();
+ r.viewPart(row, m.get().rowSize(), 0, r.columnSize()).assign(m.get());
+ row += m.get().rowSize();
+ }
+ return r;
+ }
+
+// @Test
+// public void testLeftVectors() {
+// Matrix A = lowRankMatrix();
+//
+// SequentialBigSvd s = new SequentialBigSvd(A, 6);
+// SingularValueDecomposition svd = new SingularValueDecomposition(A);
+//
+// // can only check first few singular vectors
+// Matrix u1 = svd.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+// Matrix u2 = s.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+// assertEquals(u1, u2);
+// }
+//
+// private void assertEquals(Matrix u1, Matrix u2) {
+// Assert.assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX,
Functions.ABS), 1e-10);
+// }
+//
+// private void assertEquals(Vector u1, Vector u2) {
+// Assert.assertEquals(0.0, u1.minus(u2).aggregate(Functions.MAX,
Functions.ABS), 1e-10);
+// }
+//
+// @Test
+// public void testRightVectors() {
+// Matrix A = lowRankMatrix();
+//
+// SequentialBigSvd s = new SequentialBigSvd(A, 6);
+// SingularValueDecomposition svd = new SingularValueDecomposition(A);
+//
+// Matrix v1 = svd.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+// Matrix v2 = s.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+// assertEquals(v1, v2);
+// }
+
+ private Matrix lowRankMatrix(File tmpDir, String aBase, int rowsPerSlice)
throws IOException {
+ int rank = 10;
+ Matrix u = new RandomTrinaryMatrix(1, 1000, rank, false);
+ Matrix d = new DenseMatrix(rank, rank);
+ d.set(0, 0, 5);
+ d.set(1, 1, 3);
+ d.set(2, 2, 1);
+ d.set(3, 3, 0);
+ Matrix v = new RandomTrinaryMatrix(2, 1000, rank, false);
+ Matrix a = u.times(d).times(v.transpose());
+
+ for (int i = 0; i < a.rowSize(); i += rowsPerSlice) {
+ MatrixWritable m = new MatrixWritable(a.viewPart(i, rowsPerSlice, 0,
a.columnSize()));
+ DataOutputStream out = new DataOutputStream(new FileOutputStream(new
File(tmpDir, String.format("%s-%09d", aBase, i))));
+ try {
+ m.write(out);
+ } finally {
+ out.close();
+ }
+ }
+ return a;
+ }
+
+}
Modified:
mahout/trunk/math/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java?rev=1222515&r1=1222514&r2=1222515&view=diff
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java
(original)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java
Thu Dec 22 23:51:41 2011
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.math;
import com.google.common.base.Preconditions;
Added:
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java?rev=1222515&view=auto
==============================================================================
---
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
(added)
+++
mahout/trunk/math/src/main/java/org/apache/mahout/math/ssvd/SequentialBigSvd.java
Thu Dec 22 23:51:41 2011
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.ssvd;
+
+import org.apache.mahout.math.CholeskyDecomposition;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.RandomTrinaryMatrix;
+import org.apache.mahout.math.SingularValueDecomposition;
+import org.apache.mahout.math.Vector;
+
+/**
+ * Implements an in-memory version of stochastic projection based SVD. See
SequentialOutOfCoreSvd
+ * for algorithm notes.
+ */
+public class SequentialBigSvd {
+ private Matrix y;
+ private CholeskyDecomposition cd1;
+ private CholeskyDecomposition cd2;
+ private SingularValueDecomposition svd;
+ private Matrix b;
+
+
+ public SequentialBigSvd(Matrix A, int p) {
+ // Y = A * \Omega
+ y = A.times(new RandomTrinaryMatrix(A.rowSize(), p));
+
+ // R'R = Y' Y
+ cd1 = new CholeskyDecomposition(y.transpose().times(y));
+
+ // B = Q" A = (Y R^{-1} )' A
+ b = cd1.solveRight(y).transpose().times(A);
+
+ // L L' = B B'
+ cd2 = new CholeskyDecomposition(b.times(b.transpose()));
+
+ // U_0 D V_0' = L
+ svd = new SingularValueDecomposition(cd2.getL());
+ }
+
+ public Vector getSingularValues() {
+ return new DenseVector(svd.getSingularValues());
+ }
+
+ public Matrix getU() {
+ // U = (Y inv(R)) U_0
+ return cd1.solveRight(y).times(svd.getU());
+ }
+
+ public Matrix getV() {
+ // V = (B' inv(L')) V_0
+ return cd2.solveRight(b.transpose()).times(svd.getV());
+ }
+}
Modified:
mahout/trunk/math/src/test/java/org/apache/mahout/math/CholeskyDecompositionTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/CholeskyDecompositionTest.java?rev=1222515&r1=1222514&r2=1222515&view=diff
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/CholeskyDecompositionTest.java
(original)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/CholeskyDecompositionTest.java
Thu Dec 22 23:51:41 2011
@@ -1,3 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
package org.apache.mahout.math;
import org.apache.mahout.common.RandomUtils;
Added:
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java?rev=1222515&view=auto
==============================================================================
---
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
(added)
+++
mahout/trunk/math/src/test/java/org/apache/mahout/math/ssvd/SequentialBigSvdTest.java
Thu Dec 22 23:51:41 2011
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math.ssvd;
+
+import org.apache.mahout.math.DenseMatrix;
+import org.apache.mahout.math.DenseVector;
+import org.apache.mahout.math.DiagonalMatrix;
+import org.apache.mahout.math.MahoutTestCase;
+import org.apache.mahout.math.Matrix;
+import org.apache.mahout.math.RandomTrinaryMatrix;
+import org.apache.mahout.math.SingularValueDecomposition;
+import org.apache.mahout.math.Vector;
+import org.apache.mahout.math.function.Functions;
+import org.junit.Test;
+
+public class SequentialBigSvdTest extends MahoutTestCase {
+ @Test
+ public void testSingularValues() {
+ Matrix A = lowRankMatrix();
+
+ SequentialBigSvd s = new SequentialBigSvd(A, 8);
+ SingularValueDecomposition svd = new SingularValueDecomposition(A);
+
+ Vector reference = new DenseVector(svd.getSingularValues()).viewPart(0, 8);
+ assertEquals(reference, s.getSingularValues());
+
+ assertEquals(A, s.getU().times(new
DiagonalMatrix(s.getSingularValues())).times(s.getV().transpose()));
+ }
+
+ @Test
+ public void testLeftVectors() {
+ Matrix A = lowRankMatrix();
+
+ SequentialBigSvd s = new SequentialBigSvd(A, 6);
+ SingularValueDecomposition svd = new SingularValueDecomposition(A);
+
+ // can only check first few singular vectors
+ Matrix u1 = svd.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+ Matrix u2 = s.getU().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+ assertEquals(u1, u2);
+ }
+
+ private void assertEquals(Matrix u1, Matrix u2) {
+ assertEquals(0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS),
1e-10);
+ }
+
+ private void assertEquals(Vector u1, Vector u2) {
+ assertEquals(0, u1.minus(u2).aggregate(Functions.MAX, Functions.ABS),
1e-10);
+ }
+
+ @Test
+ public void testRightVectors() {
+ Matrix A = lowRankMatrix();
+
+ SequentialBigSvd s = new SequentialBigSvd(A, 6);
+ SingularValueDecomposition svd = new SingularValueDecomposition(A);
+
+ Matrix v1 = svd.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+ Matrix v2 = s.getV().viewPart(0, 20, 0, 3).assign(Functions.ABS);
+ assertEquals(v1, v2);
+ }
+
+ private Matrix lowRankMatrix() {
+ Matrix u = new RandomTrinaryMatrix(1, 20, 4, false);
+ Matrix d = new DenseMatrix(4, 4);
+ d.set(0, 0, 5);
+ d.set(1, 1, 3);
+ d.set(2, 2, 1);
+ d.set(3, 3, 0);
+ Matrix v = new RandomTrinaryMatrix(2, 20, 4, false);
+
+ return u.times(d).times(v.transpose());
+ }
+}