Author: dlyubimov Date: Thu Aug 8 06:05:32 2013 New Revision: 1511590 URL: http://svn.apache.org/r1511590 Log: MAHOUT-1306: fixing PCA tests; -ow flag causing PCA errors
Squashed commit of the following: commit cefe0d4863955288f304683ea25cc97047805bdd Author: Dmitriy Lyubimov <[email protected]> Date: Wed Aug 7 22:50:48 2013 -0700 disabling sparse SSVD PCA test with -q=0 (leaving -q=1) in the interest of smaller test run time. commit 412f9c5e06bf2a510948475e7d670f91d18d3386 Author: Dmitriy Lyubimov <[email protected]> Date: Wed Aug 7 22:36:17 2013 -0700 cleaning up certain things; pca tests are asserting names and keys propagation commit 4b002b30ddd4038311c003c77b7ba543eadcda98 Merge: 15b9a05 70777c4 Author: Dmitriy Lyubimov <[email protected]> Date: Wed Aug 7 21:27:57 2013 -0700 Merge branch 'trunk' into PCA_fixes commit 15b9a056603f703707672e601c1e3c13dbd00b8f Author: Dmitriy Lyubimov <[email protected]> Date: Wed Aug 7 15:27:58 2013 -0700 reverting test back to q=1 commit 8e91cc6ae5056b159fa6208a26b5a4f0e002bd8c Author: Dmitriy Lyubimov <[email protected]> Date: Wed Aug 7 13:12:36 2013 -0700 test fixes Added: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java Removed: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/QJob.java Thu Aug 8 06:05:32 2013 @@ -100,6 +100,8 @@ public final class QJob { String sbPathStr = conf.get(PROP_SB_PATH); if (sbPathStr != null) { sb = SSVDHelper.loadAndSumUpVectors(new Path(sbPathStr), conf); + if (sb == null) + throw new IOException(String.format("Unable to load s_omega from path %s.", sbPathStr)); } outputs = new MultipleOutputs(new JobConf(conf)); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDHelper.java Thu Aug 8 06:05:32 2013 @@ -17,13 +17,15 @@ package org.apache.mahout.math.hadoop.stochasticsvd; +import java.io.Closeable; import java.io.IOException; +import java.util.*; import java.util.Arrays; -import java.util.Comparator; -import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; +import com.google.common.base.Function; +import com.google.common.collect.Iterators; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -31,11 +33,11 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.Writable; -import org.apache.mahout.common.iterator.sequencefile.PathFilters; -import org.apache.mahout.common.iterator.sequencefile.PathType; -import org.apache.mahout.common.iterator.sequencefile.SequenceFileDirValueIterator; -import org.apache.mahout.common.iterator.sequencefile.SequenceFileValueIterable; +import org.apache.mahout.common.IOUtils; +import org.apache.mahout.common.Pair; +import org.apache.mahout.common.iterator.sequencefile.*; import org.apache.mahout.math.*; +import org.apache.mahout.math.Vector; import org.apache.mahout.math.function.Functions; import com.google.common.collect.Lists; @@ -43,7 +45,6 @@ import com.google.common.io.Closeables; /** * set of small file manipulation helpers. - * */ public final class SSVDHelper { @@ -149,13 +150,13 @@ public final class SSVDHelper { matcher.reset(o1.getPath().getName()); if (!matcher.matches()) { throw new IllegalArgumentException("Unexpected file name, unable to deduce partition #:" - + o1.getPath()); + + o1.getPath()); } int p1 = Integer.parseInt(matcher.group(3)); matcher.reset(o2.getPath().getName()); if (!matcher.matches()) { throw new IllegalArgumentException("Unexpected file name, unable to deduce partition #:" - + o2.getPath()); + + o2.getPath()); } int p2 = Integer.parseInt(matcher.group(3)); @@ -164,49 +165,63 @@ public final class SSVDHelper { }; + public static Iterator<Pair<Writable, Vector>> drmIterator(FileSystem fs, Path glob, Configuration conf, + Deque<Closeable> closeables) + throws IOException { + SequenceFileDirIterator<Writable, VectorWritable> ret = + new SequenceFileDirIterator<Writable, VectorWritable>(glob, + PathType.GLOB, + PathFilters.logsCRCFilter(), + PARTITION_COMPARATOR, + true, + conf); + closeables.addFirst(ret); + return Iterators.transform(ret, new Function<Pair<Writable, VectorWritable>, Pair<Writable, Vector>>() { + @Override + public Pair<Writable, Vector> apply(Pair<Writable, VectorWritable> p) { + return new Pair(p.getFirst(), p.getSecond().get()); + } + }); + } + /** * helper capabiltiy to load distributed row matrices into dense matrix (to * support tests mainly). * - * @param fs - * filesystem - * @param glob - * FS glob - * @param conf - * configuration + * @param fs filesystem + * @param glob FS glob + * @param conf configuration * @return Dense matrix array */ - public static double[][] loadDistributedRowMatrix(FileSystem fs, Path glob, Configuration conf) throws IOException { + public static DenseMatrix drmLoadAsDense(FileSystem fs, Path glob, Configuration conf) throws IOException { - FileStatus[] files = fs.globStatus(glob); - if (files == null) { - return null; - } - - List<double[]> denseData = Lists.newArrayList(); - - /* - * assume it is partitioned output, so we need to read them up in order of - * partitions. - */ - Arrays.sort(files, PARTITION_COMPARATOR); - - for (FileStatus fstat : files) { - for (VectorWritable value : new SequenceFileValueIterable<VectorWritable>(fstat.getPath(), - true, - conf)) { - Vector v = value.get(); - int size = v.size(); - double[] row = new double[size]; - for (int i = 0; i < size; i++) { - row[i] = v.get(i); + Deque<Closeable> closeables = new ArrayDeque<Closeable>(); + try { + List<double[]> denseData = new ArrayList<double[]>(); + for (Iterator<Pair<Writable, Vector>> iter = drmIterator(fs, glob, conf, closeables); + iter.hasNext(); ) { + Pair<Writable, Vector> p = iter.next(); + Vector v = p.getSecond(); + double[] dd = new double[v.size()]; + if (v.isDense()) { + for (int i = 0; i < v.size(); i++) { + dd[i] = v.getQuick(i); + } + } else { + for (Vector.Element el : v.nonZeroes()) { + dd[el.index()] = el.get(); + } } - // ignore row label. - denseData.add(row); + denseData.add(dd); } + if (denseData.size() == 0) { + return null; + } else { + return new DenseMatrix(denseData.toArray(new double[denseData.size()][])); + } + } finally { + IOUtils.close(closeables); } - - return denseData.toArray(new double[denseData.size()][]); } /** Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDSolver.java Thu Aug 8 06:05:32 2013 @@ -112,6 +112,9 @@ public final class SSVDSolver { private boolean broadcast = true; private Path pcaMeanPath; + // for debugging + private long omegaSeed; + /** * create new SSVD solver. Required parameters are passed to constructor to * ensure they are set. Optional parameters can be set using setters . @@ -341,6 +344,10 @@ public final class SSVDSolver { this.pcaMeanPath = pcaMeanPath; } + long getOmegaSeed() { + return omegaSeed; + } + /** * run all SSVD jobs. * @@ -367,11 +374,15 @@ public final class SSVDSolver { Path pcaBasePath = new Path(outputPath, "pca"); + if (overwrite) { + fs.delete(outputPath, true); + } + if (pcaMeanPath != null) { fs.mkdirs(pcaBasePath); } Random rnd = RandomUtils.getRandom(); - long seed = rnd.nextLong(); + omegaSeed = rnd.nextLong(); Path sbPath = null; double xisquaredlen = 0.0; @@ -391,15 +402,10 @@ public final class SSVDSolver { } xisquaredlen = xi.dot(xi); - Omega omega = new Omega(seed, k + p); + Omega omega = new Omega(omegaSeed, k + p); Vector s_b0 = omega.mutlithreadedTRightMultiply(xi); - SSVDHelper.saveVector(s_b0, sbPath = - new Path(pcaBasePath, "somega.seq"), conf); - } - - if (overwrite) { - fs.delete(outputPath, true); + SSVDHelper.saveVector(s_b0, sbPath = new Path(pcaBasePath, "somega.seq"), conf); } /* @@ -415,7 +421,7 @@ public final class SSVDSolver { minSplitSize, k, p, - seed, + omegaSeed, reduceTasks); /* Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/UJob.java Thu Aug 8 06:05:32 2013 @@ -144,8 +144,7 @@ public class UJob { Path sigmaPath = new Path(context.getConfiguration().get(PROP_SIGMA_PATH)); FileSystem fs = FileSystem.get(uHatPath.toUri(), context.getConfiguration()); - uHat = new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs, - uHatPath, context.getConfiguration())); + uHat = SSVDHelper.drmLoadAsDense(fs, uHatPath, context.getConfiguration()); // since uHat is (k+p) x (k+p) kp = uHat.columnSize(); k = context.getConfiguration().getInt(PROP_K, kp); Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java Thu Aug 8 06:05:32 2013 @@ -101,8 +101,7 @@ public class VJob { Path sigmaPath = new Path(conf.get(PROP_SIGMA_PATH)); - uHat = - new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs, uHatPath, conf)); + uHat = SSVDHelper.drmLoadAsDense(fs, uHatPath, conf); // since uHat is (k+p) x (k+p) kp = uHat.columnSize(); k = context.getConfiguration().getInt(PROP_K, kp); Added: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java?rev=1511590&view=auto ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java (added) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCASparseTest.java Thu Aug 8 06:05:32 2013 @@ -0,0 +1,297 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.hadoop.stochasticsvd; + +import com.google.common.collect.Lists; +import com.google.common.io.Closeables; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.io.Writable; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.mahout.common.IOUtils; +import org.apache.mahout.common.MahoutTestCase; +import org.apache.mahout.common.Pair; +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.math.*; +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.Functions; +import org.apache.mahout.math.function.VectorFunction; +import org.junit.Test; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.Deque; +import java.util.Iterator; +import java.util.Random; + +public class LocalSSVDPCASparseTest extends MahoutTestCase { + + private static final double s_epsilon = 1.0E-10d; + + @Test + public void testOmegaTRightMultiply() { + final Random rnd = RandomUtils.getRandom(); + final long seed = rnd.nextLong(); + final int n = 2000; + + final int kp = 100; + + final Omega omega = new Omega(seed, kp); + final Matrix materializedOmega = new DenseMatrix(n, kp); + for (int i = 0; i < n; i++) + for (int j = 0; j < kp; j++) + materializedOmega.setQuick(i, j, omega.getQuick(i, j)); + Vector xi = new DenseVector(n); + xi.assign(new DoubleFunction() { + @Override + public double apply(double x) { + return rnd.nextDouble() * 100; + } + }); + + Vector s_o = omega.mutlithreadedTRightMultiply(xi); + + Matrix xiVector = new DenseMatrix(n, 1); + xiVector.assignColumn(0, xi); + + Vector s_o_control = materializedOmega.transpose().times(xiVector).viewColumn(0); + + assertEquals(0, s_o.minus(s_o_control).aggregate(Functions.PLUS, Functions.ABS), 1e-10); + + System.out.printf("s_omega=\n%s\n", s_o); + System.out.printf("s_omega_control=\n%s\n", s_o_control); + } + + @Test + public void runPCATest1() throws IOException { + runSSVDSolver(1); + } + +// @Test + public void runPCATest0() throws IOException { + runSSVDSolver(0); + } + + + public void runSSVDSolver(int q) throws IOException { + + Configuration conf = new Configuration(); + conf.set("mapred.job.tracker", "local"); + conf.set("fs.default.name", "file:///"); + + // conf.set("mapred.job.tracker","localhost:11011"); + // conf.set("fs.default.name","hdfs://localhost:11010/"); + + Deque<Closeable> closeables = Lists.newLinkedList(); + try { + Random rnd = RandomUtils.getRandom(); + + File tmpDir = getTestTempDir("svdtmp"); + conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); + + Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); + + // create distributed row matrix-like struct + SequenceFile.Writer w = + SequenceFile.createWriter(FileSystem.getLocal(conf), + conf, + aLocPath, + Text.class, + VectorWritable.class, + CompressionType.BLOCK, + new DefaultCodec()); + closeables.addFirst(w); + + int n = 100; + int m = 2000; + double percent = 5; + + VectorWritable vw = new VectorWritable(); + Text rkey = new Text(); + + Vector xi = new DenseVector(n); + + double muAmplitude = 50.0; + for (int i = 0; i < m; i++) { + Vector dv = new SequentialAccessSparseVector(n); + String rowname = "row-"+i; + NamedVector namedRow = new NamedVector(dv, rowname); + for (int j = 0; j < n * percent / 100; j++) { + dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); + } + rkey.set("row-i"+i); + vw.set(namedRow); + w.append(rkey, vw); + xi.assign(dv, Functions.PLUS); + } + closeables.remove(w); + Closeables.close(w, false); + + xi.assign(Functions.mult(1.0 / m)); + + FileSystem fs = FileSystem.get(conf); + + Path tempDirPath = getTestTempDirPath("svd-proc"); + Path aPath = new Path(tempDirPath, "A/A.seq"); + fs.copyFromLocalFile(aLocPath, aPath); + Path xiPath = new Path(tempDirPath, "xi/xi.seq"); + SSVDHelper.saveVector(xi, xiPath, conf); + + Path svdOutPath = new Path(tempDirPath, "SSVD-out"); + + // make sure we wipe out previous test results, just a convenience + fs.delete(svdOutPath, true); + + // Solver starts here: + System.out.println("Input prepared, starting solver..."); + + int ablockRows = 867; + int p = 60; + int k = 40; + SSVDSolver ssvd = + new SSVDSolver(conf, + new Path[]{aPath}, + svdOutPath, + ablockRows, + k, + p, + 3); + ssvd.setOuterBlockHeight(500); + ssvd.setAbtBlockHeight(251); + ssvd.setPcaMeanPath(xiPath); + + /* + * Removing V,U jobs from this test to reduce running time. i will keep them + * put in the dense test though. + * + * For PCA test, we also want to request U*Sigma output and check it for named + * vector propagation. + */ + ssvd.setComputeU(false); + ssvd.setComputeV(false); + ssvd.setcUSigma(true); + + ssvd.setOverwrite(true); + ssvd.setQ(q); + ssvd.setBroadcast(true); + ssvd.run(); + + Vector stochasticSValues = ssvd.getSingularValues(); + + // try to run the same thing without stochastic algo + Matrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); + + verifyInternals(svdOutPath, a, new Omega(ssvd.getOmegaSeed(), k + p), k + p, q); + + // subtract pseudo pca mean + for (int i = 0; i < m; i++) { + a.viewRow(i).assign(xi, Functions.MINUS); + } + + SingularValueDecomposition svd2 = + new SingularValueDecomposition(a); + + Vector svalues2 = new DenseVector(svd2.getSingularValues()); + + System.out.println("--SSVD solver singular values:"); + LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); + System.out.println("--SVD solver singular values:"); + LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); + + for (int i = 0; i < k + p; i++) { + assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); + } + + DenseMatrix mQ = + SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + + BtJob.OUTPUT_Q + "-*"), conf); + + SSVDCommonTest.assertOrthonormality(mQ, + false, + s_epsilon); + + // assert name propagation + for (Iterator<Pair<Writable, Vector>> iter = SSVDHelper.drmIterator(fs, + new Path(ssvd.getuSigmaPath()+"/*"), + conf, + closeables); iter.hasNext(); ) { + Pair<Writable, Vector> pair = iter.next(); + Writable key = pair.getFirst(); + Vector v = pair.getSecond(); + + assertTrue(v instanceof NamedVector); + assertTrue(key instanceof Text); + } + + } finally { + IOUtils.close(closeables); + } + } + + private void verifyInternals(Path tempDir, Matrix a, Omega omega, int kp, int q) { + int m = a.numRows(); + int n = a.numCols(); + + Vector xi = a.aggregateColumns(new VectorFunction() { + @Override + public double apply(Vector v) { + return v.zSum() / v.size(); + } + }); + + // materialize omega + Matrix momega = new DenseMatrix(n, kp); + for (int i = 0; i < n; i++) + for (int j = 0; j < kp; j++) + momega.setQuick(i, j, omega.getQuick(i, j)); + + Vector s_o = omega.mutlithreadedTRightMultiply(xi); + + System.out.printf("s_omega=\n%s\n", s_o); + + Matrix y = a.times(momega); + for (int i = 0; i < n; i++) y.viewRow(i).assign(s_o, Functions.MINUS); + + QRDecomposition qr = new QRDecomposition(y); + Matrix qm = qr.getQ(); + + Vector s_q = qm.aggregateColumns(new VectorFunction() { + @Override + public double apply(Vector v) { + return v.zSum(); + } + }); + + System.out.printf("s_q=\n%s\n", s_q); + + Matrix b = qm.transpose().times(a); + + Vector s_b = b.times(xi); + + System.out.printf("s_b=\n%s\n", s_b); + + + } + +} Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java Thu Aug 8 06:05:32 2013 @@ -150,7 +150,7 @@ public class LocalSSVDSolverDenseTest ex * System.out.println("--Colt SVD solver singular values:"); // try to run * * the same thing without stochastic algo double[][] a = - * SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf); + * SSVDSolver.drmLoadAsDense(fs, aPath, conf); * * * @@ -173,26 +173,25 @@ public class LocalSSVDSolverDenseTest ex / singularValues.getQuick(i)) <= s_precisionPct / 100); } - double[][] mQ = - SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" - + BtJob.OUTPUT_Q + "-*"), conf); - - SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), - false, - s_epsilon); - - double[][] u = - SSVDHelper.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "U/[^_]*"), - conf); - - SSVDCommonTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon); - double[][] v = - SSVDHelper.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "V/[^_]*"), - conf); - - SSVDCommonTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon); + DenseMatrix mQ = + SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + + BtJob.OUTPUT_Q + "-*"), conf); + + SSVDCommonTest.assertOrthonormality(mQ, + false, + s_epsilon); + + DenseMatrix u = + SSVDHelper.drmLoadAsDense(fs, + new Path(svdOutPath, "U/*"), + conf); + SSVDCommonTest.assertOrthonormality(u, false, s_epsilon); + + DenseMatrix v = + SSVDHelper.drmLoadAsDense(fs, + new Path(svdOutPath, "V/*"), + conf); + SSVDCommonTest.assertOrthonormality(v, false, s_epsilon); } static void dumpSv(Vector s) { @@ -204,13 +203,4 @@ public class LocalSSVDSolverDenseTest ex } - static void dump(double[][] matrix) { - for (double[] aMatrix : matrix) { - for (double anAMatrix : aMatrix) { - System.out.printf("%f ", anAMatrix); - } - System.out.println(); - } - } - } Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1511590&r1=1511589&r2=1511590&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Thu Aug 8 06:05:32 2013 @@ -163,12 +163,12 @@ public class LocalSSVDSolverSparseSequen System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo - double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); + DenseMatrix a = SSVDHelper.drmLoadAsDense(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = - new SingularValueDecomposition(new DenseMatrix(a)); + new SingularValueDecomposition(a); Vector svalues2 = new DenseVector(svd2.getSingularValues()); dumpSv(svalues2); @@ -177,13 +177,13 @@ public class LocalSSVDSolverSparseSequen assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } - double[][] mQ = - SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" - + BtJob.OUTPUT_Q + "-*"), conf); - - SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), - false, - s_epsilon); + DenseMatrix mQ = + SSVDHelper.drmLoadAsDense(fs, new Path(svdOutPath, "Bt-job/" + + BtJob.OUTPUT_Q + "-*"), conf); + + SSVDCommonTest.assertOrthonormality(mQ, + false, + s_epsilon); IOUtils.close(closeables); }
