Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/VJob.java Wed Feb 22 21:57:27 2012 @@ -36,6 +36,8 @@ import org.apache.mahout.math.DenseVecto import org.apache.mahout.math.Matrix; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.function.Functions; +import org.apache.mahout.math.function.PlusMult; public class VJob { private static final String OUTPUT_V = "v"; @@ -43,13 +45,120 @@ public class VJob { private static final String PROP_SIGMA_PATH = "ssvd.sigma.path"; private static final String PROP_V_HALFSIGMA = "ssvd.v.halfsigma"; private static final String PROP_K = "ssvd.k"; + public static final String PROP_SQ_PATH = "ssvdpca.sq.path"; + public static final String PROP_XI_PATH = "ssvdpca.xi.path"; private Job job; - public void start(Configuration conf, Path inputPathBt, Path inputUHatPath, - Path inputSigmaPath, Path outputPath, int k, int numReduceTasks, - boolean vHalfSigma) throws ClassNotFoundException, InterruptedException, - IOException { + public static final class VMapper extends + Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> { + + private Matrix uHat; + private Vector vRow; + private Vector sValues; + private VectorWritable vRowWritable; + private int kp; + private int k; + /* + * xi and s_q are PCA-related corrections, per MAHOUT-817 + */ + protected Vector xi; + protected Vector sq; + protected PlusMult plusMult = new PlusMult(0); + + @Override + protected void map(IntWritable key, VectorWritable value, Context context) + throws IOException, InterruptedException { + Vector bCol = value.get(); + /* + * MAHOUT-817: PCA correction for B': b_{col=i} -= s_q * xi_{i} + */ + if (xi != null) { + /* + * code defensively against shortened xi which may be externally + * supplied + */ + int btIndex = key.get(); + double xii = xi.size() > btIndex ? xi.getQuick(btIndex) : 0.0; + plusMult.setMultiplicator(-xii); + bCol.assign(sq, plusMult); + } + + for (int i = 0; i < k; i++) { + vRow.setQuick(i, bCol.dot(uHat.viewColumn(i)) / sValues.getQuick(i)); + } + context.write(key, vRowWritable); + } + + @Override + protected void setup(Context context) throws IOException, + InterruptedException { + super.setup(context); + + Configuration conf = context.getConfiguration(); + FileSystem fs = FileSystem.get(conf); + Path uHatPath = new Path(conf.get(PROP_UHAT_PATH)); + + Path sigmaPath = new Path(conf.get(PROP_SIGMA_PATH)); + + uHat = + new DenseMatrix(SSVDHelper.loadDistributedRowMatrix(fs, uHatPath, conf)); + // since uHat is (k+p) x (k+p) + kp = uHat.columnSize(); + k = context.getConfiguration().getInt(PROP_K, kp); + vRow = new DenseVector(k); + vRowWritable = new VectorWritable(vRow); + + sValues = SSVDHelper.loadVector(sigmaPath, conf); + if (conf.get(PROP_V_HALFSIGMA) != null) { + sValues.assign(Functions.SQRT); + } + + /* + * PCA -related corrections (MAHOUT-817) + */ + String xiPathStr = conf.get(PROP_XI_PATH); + if (xiPathStr != null) { + xi = SSVDHelper.loadAndSumUpVectors(new Path(xiPathStr), conf); + sq = + SSVDHelper.loadAndSumUpVectors(new Path(conf.get(PROP_SQ_PATH)), conf); + } + + } + + } + + /** + * + * @param conf + * @param inputPathBt + * @param xiPath + * PCA row mean (MAHOUT-817, to fix B') + * @param sqPath + * sq (MAHOUT-817, to fix B') + * @param inputUHatPath + * @param inputSigmaPath + * @param outputPath + * @param k + * @param numReduceTasks + * @param vHalfSigma + * @throws ClassNotFoundException + * @throws InterruptedException + * @throws IOException + */ + public void run(Configuration conf, + Path inputPathBt, + Path xiPath, + Path sqPath, + + Path inputUHatPath, + Path inputSigmaPath, + + Path outputPath, + int k, + int numReduceTasks, + boolean vHalfSigma) throws ClassNotFoundException, + InterruptedException, IOException { job = new Job(conf); job.setJobName("V-job"); @@ -64,7 +173,8 @@ public class VJob { job.getConfiguration().set("mapreduce.output.basename", OUTPUT_V); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, DefaultCodec.class); - SequenceFileOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK); + SequenceFileOutputFormat.setOutputCompressionType(job, + CompressionType.BLOCK); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(VectorWritable.class); @@ -81,12 +191,21 @@ public class VJob { } job.getConfiguration().setInt(PROP_K, k); job.setNumReduceTasks(0); + + /* + * PCA-related options, MAHOUT-817 + */ + if (xiPath != null) { + job.getConfiguration().set(PROP_XI_PATH, xiPath.toString()); + job.getConfiguration().set(PROP_SQ_PATH, sqPath.toString()); + } + job.submit(); } public void waitForCompletion() throws IOException, ClassNotFoundException, - InterruptedException { + InterruptedException { job.waitForCompletion(false); if (!job.isSuccessful()) { @@ -95,54 +214,4 @@ public class VJob { } - public static final class VMapper extends - Mapper<IntWritable, VectorWritable, IntWritable, VectorWritable> { - - private Matrix uHat; - private DenseVector vRow; - private DenseVector sValues; - private VectorWritable vRowWritable; - private int kp; - private int k; - - @Override - protected void map(IntWritable key, VectorWritable value, Context context) - throws IOException, InterruptedException { - Vector qRow = value.get(); - for (int i = 0; i < k; i++) { - vRow.setQuick(i, - qRow.dot(uHat.viewColumn(i)) / sValues.getQuick(i)); - } - context.write(key, vRowWritable); // U inherits original A row labels. - } - - @Override - protected void setup(Context context) throws IOException, - InterruptedException { - super.setup(context); - Path uHatPath = new Path(context.getConfiguration().get(PROP_UHAT_PATH)); - - Path sigmaPath = new Path(context.getConfiguration().get(PROP_SIGMA_PATH)); - FileSystem fs = FileSystem.get(uHatPath.toUri(), context.getConfiguration()); - - uHat = new DenseMatrix(SSVDSolver.loadDistributedRowMatrix(fs, - uHatPath, context.getConfiguration())); - // since uHat is (k+p) x (k+p) - kp = uHat.columnSize(); - k = context.getConfiguration().getInt(PROP_K, kp); - vRow = new DenseVector(k); - vRowWritable = new VectorWritable(vRow); - - sValues = new DenseVector(SSVDSolver.loadDistributedRowMatrix(fs, - sigmaPath, context.getConfiguration())[0], true); - if (context.getConfiguration().get(PROP_V_HALFSIGMA) != null) { - for (int i = 0; i < k; i++) { - sValues.setQuick(i, Math.sqrt(sValues.getQuick(i))); - } - } - - } - - } - }
Modified: mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java (original) +++ mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/stochasticsvd/YtYJob.java Wed Feb 22 21:57:27 2012 @@ -70,7 +70,7 @@ public class YtYJob { @Override protected void setup(Context context) throws IOException, - InterruptedException { + InterruptedException { int k = context.getConfiguration().getInt(PROP_K, -1); int p = context.getConfiguration().getInt(PROP_P, -1); @@ -78,10 +78,10 @@ public class YtYJob { Validate.isTrue(p > 0, "invalid p parameter"); kp = k + p; - long omegaSeed = Long.parseLong(context.getConfiguration() - .get(PROP_OMEGA_SEED)); + long omegaSeed = + Long.parseLong(context.getConfiguration().get(PROP_OMEGA_SEED)); - omega = new Omega(omegaSeed, k, p); + omega = new Omega(omegaSeed, k + p); mYtY = new UpperTriangular(kp); @@ -92,7 +92,7 @@ public class YtYJob { @Override protected void map(Writable key, VectorWritable value, Context context) - throws IOException, InterruptedException { + throws IOException, InterruptedException { omega.computeYRow(value.get(), yRow); // compute outer product update for YtY @@ -115,12 +115,10 @@ public class YtYJob { * are creating some short-lived references) here is that we obviously * do two times more iterations then necessary if y row is pretty dense. */ - for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI - .hasNext();) { + for (Iterator<Vector.Element> iterI = yRow.iterateNonZero(); iterI.hasNext();) { Vector.Element eli = iterI.next(); int i = eli.index(); - for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ - .hasNext();) { + for (Iterator<Vector.Element> iterJ = yRow.iterateNonZero(); iterJ.hasNext();) { Vector.Element elj = iterJ.next(); int j = elj.index(); if (j < i) { @@ -134,9 +132,10 @@ public class YtYJob { @Override protected void cleanup(Context context) throws IOException, - InterruptedException { + InterruptedException { context.write(new IntWritable(context.getTaskAttemptID().getTaskID() - .getId()), new VectorWritable(new DenseVector(mYtY.getData()))); + .getId()), + new VectorWritable(new DenseVector(mYtY.getData()))); } } @@ -147,7 +146,7 @@ public class YtYJob { @Override protected void setup(Context context) throws IOException, - InterruptedException { + InterruptedException { int k = context.getConfiguration().getInt(PROP_K, -1); int p = context.getConfiguration().getInt(PROP_P, -1); @@ -158,22 +157,28 @@ public class YtYJob { @Override protected void cleanup(Context context) throws IOException, - InterruptedException { + InterruptedException { context.write(new IntWritable(), accum); } @Override - protected void reduce(IntWritable key, Iterable<VectorWritable> values, - Context arg2) throws IOException, InterruptedException { + protected void reduce(IntWritable key, + Iterable<VectorWritable> values, + Context arg2) throws IOException, + InterruptedException { for (VectorWritable vw : values) { acc.addAll(vw.get()); } } } - public static void run(Configuration conf, Path[] inputPaths, - Path outputPath, int k, int p, long seed) - throws ClassNotFoundException, InterruptedException, IOException { + public static void run(Configuration conf, + Path[] inputPaths, + Path outputPath, + int k, + int p, + long seed) throws ClassNotFoundException, + InterruptedException, IOException { Job job = new Job(conf); job.setJobName("YtY-job"); Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java Wed Feb 22 21:57:27 2012 @@ -17,9 +17,10 @@ package org.apache.mahout.math.hadoop; -import com.google.common.base.Function; -import com.google.common.collect.Iterators; -import com.google.common.collect.Maps; +import java.io.IOException; +import java.util.Iterator; +import java.util.Map; + import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; @@ -27,6 +28,7 @@ import org.apache.hadoop.fs.Path; import org.apache.mahout.clustering.ClusteringTestUtils; import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.iterator.sequencefile.PathFilters; +import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.Matrix; import org.apache.mahout.math.MatrixSlice; import org.apache.mahout.math.RandomAccessSparseVector; @@ -34,11 +36,12 @@ import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorIterable; import org.apache.mahout.math.VectorWritable; import org.apache.mahout.math.decomposer.SolverTest; +import org.apache.mahout.math.function.Functions; import org.junit.Test; -import java.io.IOException; -import java.util.Iterator; -import java.util.Map; +import com.google.common.base.Function; +import com.google.common.collect.Iterators; +import com.google.common.collect.Maps; public final class TestDistributedRowMatrix extends MahoutTestCase { public static final String TEST_PROPERTY_KEY = "test.property.key"; @@ -81,6 +84,38 @@ public final class TestDistributedRowMat } @Test + public void testMatrixColumnMeansJob() throws Exception { + Matrix m = + SolverTest.randomSequentialAccessSparseMatrix(100, 90, 50, 20, 1.0); + DistributedRowMatrix dm = + randomDistributedMatrix(100, 90, 50, 20, 1.0, false); + + Vector expected = new DenseVector(50); + for (int i = 0; i < m.numRows(); i++) { + expected.assign(m.viewRow(i), Functions.PLUS); + } + expected.assign(Functions.DIV, m.numRows()); + Vector actual = dm.columnMeans("DenseVector"); + assertEquals(0.0, expected.getDistanceSquared(actual), EPSILON); + } + + @Test + public void testNullMatrixColumnMeansJob() throws Exception { + Matrix m = + SolverTest.randomSequentialAccessSparseMatrix(100, 90, 0, 0, 1.0); + DistributedRowMatrix dm = + randomDistributedMatrix(100, 90, 0, 0, 1.0, false); + + Vector expected = new DenseVector(0); + for (int i = 0; i < m.numRows(); i++) { + expected.assign(m.viewRow(i), Functions.PLUS); + } + expected.assign(Functions.DIV, m.numRows()); + Vector actual = dm.columnMeans(); + assertEquals(0.0, expected.getDistanceSquared(actual), EPSILON); + } + + @Test public void testMatrixTimesVector() throws Exception { Vector v = new RandomAccessSparseVector(50); v.assign(1.0); @@ -118,33 +153,33 @@ public final class TestDistributedRowMat } @Test - public void testMatrixMultiplactionJobConfBuilder() throws Exception { + public void testMatrixMultiplactionJobConfBuilder() throws Exception { Configuration initialConf = createInitialConf(); - - Path baseTmpDirPath = getTestTempDirPath("testpaths"); + + Path baseTmpDirPath = getTestTempDirPath("testpaths"); Path aPath = new Path(baseTmpDirPath, "a"); Path bPath = new Path(baseTmpDirPath, "b"); Path outPath = new Path(baseTmpDirPath, "out"); - + Configuration mmJobConf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(aPath, bPath, outPath, 10); - Configuration mmCustomJobConf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, - aPath, - bPath, - outPath, + Configuration mmCustomJobConf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, + aPath, + bPath, + outPath, 10); - + assertNull(mmJobConf.get(TEST_PROPERTY_KEY)); - assertEquals(TEST_PROPERTY_VALUE, mmCustomJobConf.get(TEST_PROPERTY_KEY)); + assertEquals(TEST_PROPERTY_VALUE, mmCustomJobConf.get(TEST_PROPERTY_KEY)); } - + @Test public void testTransposeJobConfBuilder() throws Exception { Configuration initialConf = createInitialConf(); - - Path baseTmpDirPath = getTestTempDirPath("testpaths"); + + Path baseTmpDirPath = getTestTempDirPath("testpaths"); Path inputPath = new Path(baseTmpDirPath, "input"); Path outputPath = new Path(baseTmpDirPath, "output"); - + Configuration transposeJobConf = TransposeJob.buildTransposeJobConf(inputPath, outputPath, 10); Configuration transposeCustomJobConf = TransposeJob.buildTransposeJobConf(initialConf, inputPath, outputPath, 10); @@ -155,7 +190,7 @@ public final class TestDistributedRowMat @Test public void testTimesSquaredJobConfBuilders() throws Exception { Configuration initialConf = createInitialConf(); - Path baseTmpDirPath = getTestTempDirPath("testpaths"); + Path baseTmpDirPath = getTestTempDirPath("testpaths"); Path inputPath = new Path(baseTmpDirPath, "input"); Path outputPath = new Path(baseTmpDirPath, "output"); @@ -167,46 +202,46 @@ public final class TestDistributedRowMat assertNull(timesSquaredJobConf1.get(TEST_PROPERTY_KEY)); assertEquals(TEST_PROPERTY_VALUE, customTimesSquaredJobConf1.get(TEST_PROPERTY_KEY)); - + Configuration timesJobConf = TimesSquaredJob.createTimesJobConf(v, 50, inputPath, outputPath); Configuration customTimesJobConf = TimesSquaredJob.createTimesJobConf(initialConf, v, 50, inputPath, outputPath); - + assertNull(timesJobConf.get(TEST_PROPERTY_KEY)); assertEquals(TEST_PROPERTY_VALUE, customTimesJobConf.get(TEST_PROPERTY_KEY)); - - Configuration timesSquaredJobConf2 = TimesSquaredJob.createTimesSquaredJobConf(v, - inputPath, - outputPath, - TimesSquaredJob.TimesSquaredMapper.class, + + Configuration timesSquaredJobConf2 = TimesSquaredJob.createTimesSquaredJobConf(v, + inputPath, + outputPath, + TimesSquaredJob.TimesSquaredMapper.class, TimesSquaredJob.VectorSummingReducer.class); Configuration customTimesSquaredJobConf2 = TimesSquaredJob.createTimesSquaredJobConf(initialConf, - v, - inputPath, - outputPath, - TimesSquaredJob.TimesSquaredMapper.class, + v, + inputPath, + outputPath, + TimesSquaredJob.TimesSquaredMapper.class, TimesSquaredJob.VectorSummingReducer.class); - + assertNull(timesSquaredJobConf2.get(TEST_PROPERTY_KEY)); assertEquals(TEST_PROPERTY_VALUE, customTimesSquaredJobConf2.get(TEST_PROPERTY_KEY)); Configuration timesSquaredJobConf3 = TimesSquaredJob.createTimesSquaredJobConf(v, 50, - inputPath, - outputPath, - TimesSquaredJob.TimesSquaredMapper.class, + inputPath, + outputPath, + TimesSquaredJob.TimesSquaredMapper.class, TimesSquaredJob.VectorSummingReducer.class); Configuration customTimesSquaredJobConf3 = TimesSquaredJob.createTimesSquaredJobConf(initialConf, v, 50, - inputPath, - outputPath, - TimesSquaredJob.TimesSquaredMapper.class, + inputPath, + outputPath, + TimesSquaredJob.TimesSquaredMapper.class, TimesSquaredJob.VectorSummingReducer.class); - + assertNull(timesSquaredJobConf3.get(TEST_PROPERTY_KEY)); assertEquals(TEST_PROPERTY_VALUE, customTimesSquaredJobConf3.get(TEST_PROPERTY_KEY)); } - + @Test public void testTimesVectorTempDirDeletion() throws Exception { Configuration conf = new Configuration(); @@ -224,13 +259,13 @@ public final class TestDistributedRowMat Vector result1 = dm.times(v); assertEquals(0, fs.listStatus(outputPath).length); - + deleteContentsOfPath(conf, outputPath); assertEquals(0, fs.listStatus(outputPath).length); - + conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true); dm.setConf(conf); - + Vector result2 = dm.times(v); FileStatus[] outputStatuses = fs.listStatus(outputPath); @@ -261,13 +296,13 @@ public final class TestDistributedRowMat Vector result1 = dm.timesSquared(v); assertEquals(0, fs.listStatus(outputPath).length); - + deleteContentsOfPath(conf, outputPath); assertEquals(0, fs.listStatus(outputPath).length); - + conf.setBoolean(DistributedRowMatrix.KEEP_TEMP_FILES, true); dm.setConf(conf); - + Vector result2 = dm.timesSquared(v); FileStatus[] outputStatuses = fs.listStatus(outputPath); @@ -277,7 +312,7 @@ public final class TestDistributedRowMat Path outputVectorPath = new Path(outputTempPath, TimesSquaredJob.OUTPUT_VECTOR_FILENAME); assertEquals(1, fs.listStatus(inputVectorPath, PathFilters.logsCRCFilter()).length); assertEquals(1, fs.listStatus(outputVectorPath, PathFilters.logsCRCFilter()).length); - + assertEquals(0.0, result1.getDistanceSquared(result2), EPSILON); } @@ -289,13 +324,13 @@ public final class TestDistributedRowMat private static void deleteContentsOfPath(Configuration conf, Path path) throws Exception { FileSystem fs = path.getFileSystem(conf); - + FileStatus[] statuses = fs.listStatus(path); for (FileStatus status : statuses) { fs.delete(status.getPath(), true); - } + } } - + public DistributedRowMatrix randomDistributedMatrix(int numRows, int nonNullRows, int numCols, Copied: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java (from r1245615, mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java) URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java&r1=1245615&r2=1292532&rev=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDPCADenseTest.java Wed Feb 22 21:57:27 2012 @@ -1,226 +1,185 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math.hadoop.stochasticsvd; - -import java.io.Closeable; -import java.io.File; -import java.io.IOException; -import java.util.Deque; -import java.util.LinkedList; -import java.util.Random; - -import com.google.common.io.Closeables; - -import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; -import org.apache.hadoop.io.IntWritable; -import org.apache.hadoop.io.SequenceFile; -import org.apache.hadoop.io.SequenceFile.CompressionType; -import org.apache.hadoop.io.compress.DefaultCodec; -import org.apache.mahout.common.MahoutTestCase; -import org.apache.mahout.common.RandomUtils; -import org.apache.mahout.math.DenseMatrix; -import org.apache.mahout.math.SequentialAccessSparseVector; -import org.apache.mahout.math.SingularValueDecomposition; -import org.apache.mahout.math.Vector; -import org.apache.mahout.math.VectorWritable; -import org.junit.Test; - -/** - * - * Tests SSVD solver with a made-up data running hadoop solver in a local mode. - * It requests full-rank SSVD and then compares singular values to that of - * Colt's SVD asserting epsilon(precision) 1e-10 or whatever most recent value - * configured. - * - */ -public class LocalSSVDSolverSparseSequentialTest extends MahoutTestCase { - - private static final double s_epsilon = 1.0E-10d; - - /* - * removing from tests to reduce test running time - */ - /* @Test */ - public void testSSVDSolverSparse() throws IOException { - runSSVDSolver(0); - } - - @Test - public void testSSVDSolverPowerIterations1() throws IOException { - runSSVDSolver(1); - } - - public void runSSVDSolver(int q) throws IOException { - - Configuration conf = new Configuration(); - conf.set("mapred.job.tracker", "local"); - conf.set("fs.default.name", "file:///"); - - // conf.set("mapred.job.tracker","localhost:11011"); - // conf.set("fs.default.name","hdfs://localhost:11010/"); - - Deque<Closeable> closeables = new LinkedList<Closeable>(); - Random rnd = RandomUtils.getRandom(); - - File tmpDir = getTestTempDir("svdtmp"); - conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); - - Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); - - // create distributed row matrix-like struct - SequenceFile.Writer w = - SequenceFile.createWriter(FileSystem.getLocal(conf), - conf, - aLocPath, - IntWritable.class, - VectorWritable.class, - CompressionType.BLOCK, - new DefaultCodec()); - closeables.addFirst(w); - - int n = 100; - int m = 2000; - double percent = 5; - - VectorWritable vw = new VectorWritable(); - IntWritable roww = new IntWritable(); - - double muAmplitude = 50.0; - for (int i = 0; i < m; i++) { - Vector dv = new SequentialAccessSparseVector(n); - for (int j = 0; j < n * percent / 100; j++) { - dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.5)); - } - roww.set(i); - vw.set(dv); - w.append(roww, vw); - } - closeables.remove(w); - Closeables.close(w, true); - - FileSystem fs = FileSystem.get(aLocPath.toUri(), conf); - - Path tempDirPath = getTestTempDirPath("svd-proc"); - Path aPath = new Path(tempDirPath, "A/A.seq"); - fs.copyFromLocalFile(aLocPath, aPath); - - Path svdOutPath = new Path(tempDirPath, "SSVD-out"); - - // make sure we wipe out previous test results, just a convenience - fs.delete(svdOutPath, true); - - // Solver starts here: - System.out.println("Input prepared, starting solver..."); - - int ablockRows = 867; - int p = 60; - int k = 40; - SSVDSolver ssvd = - new SSVDSolver(conf, - new Path[] { aPath }, - svdOutPath, - ablockRows, - k, - p, - 3); - ssvd.setOuterBlockHeight(500); - ssvd.setAbtBlockHeight(251); - - /* - * removing V,U jobs from this test to reduce running time. i will keep them - * put in the dense test though. - */ - ssvd.setComputeU(false); - ssvd.setComputeV(false); - - ssvd.setOverwrite(true); - ssvd.setQ(q); - ssvd.setBroadcast(true); - ssvd.run(); - - double[] stochasticSValues = ssvd.getSingularValues(); - System.out.println("--SSVD solver singular values:"); - dumpSv(stochasticSValues); - System.out.println("--Colt SVD solver singular values:"); - - // try to run the same thing without stochastic algo - double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf); - - // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new - // Array2DRowRealMatrix(a)); - SingularValueDecomposition svd2 = - new SingularValueDecomposition(new DenseMatrix(a)); - - double[] svalues2 = svd2.getSingularValues(); - dumpSv(svalues2); - - for (int i = 0; i < k + p; i++) { - assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon); - } - - double[][] mQ = - SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" - + BtJob.OUTPUT_Q + "-*"), conf); - - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); - - /* - * removing tests on U and V to keep this test leaner. I will keep U,V - * computation and assertions in the dense tests though. - */ - - /* - double[][] u = - SSVDSolver.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "U/[^_]*"), - conf); - - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(u), false, s_epsilon); - double[][] v = - SSVDSolver.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "V/[^_]*"), - conf); - - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(v), false, s_epsilon); - */ - } - - static void dumpSv(double[] s) { - System.out.printf("svs: "); - for (double value : s) { - System.out.printf("%f ", value); - } - System.out.println(); - - } - - static void dump(double[][] matrix) { - for (double[] aMatrix : matrix) { - for (double anAMatrix : aMatrix) { - System.out.printf("%f ", anAMatrix); - } - System.out.println(); - } - } - -} +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math.hadoop.stochasticsvd; + +import java.io.Closeable; +import java.io.File; +import java.io.IOException; +import java.util.Deque; +import java.util.LinkedList; +import java.util.Random; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.SequenceFile; +import org.apache.hadoop.io.SequenceFile.CompressionType; +import org.apache.hadoop.io.compress.DefaultCodec; +import org.apache.mahout.common.MahoutTestCase; +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; +import org.apache.mahout.math.SequentialAccessSparseVector; +import org.apache.mahout.math.SingularValueDecomposition; +import org.apache.mahout.math.Vector; +import org.apache.mahout.math.VectorWritable; +import org.apache.mahout.math.function.Functions; +import org.junit.Test; + +import com.google.common.io.Closeables; + +public class LocalSSVDPCADenseTest extends MahoutTestCase { + + private static final double s_epsilon = 1.0E-10d; + + @Test + public void runPCATest1() throws IOException { + runSSVDSolver(1); + } + + public void runSSVDSolver(int q) throws IOException { + + Configuration conf = new Configuration(); + conf.set("mapred.job.tracker", "local"); + conf.set("fs.default.name", "file:///"); + + // conf.set("mapred.job.tracker","localhost:11011"); + // conf.set("fs.default.name","hdfs://localhost:11010/"); + + Deque<Closeable> closeables = new LinkedList<Closeable>(); + Random rnd = RandomUtils.getRandom(); + + File tmpDir = getTestTempDir("svdtmp"); + conf.set("hadoop.tmp.dir", tmpDir.getAbsolutePath()); + + Path aLocPath = new Path(getTestTempDirPath("svdtmp/A"), "A.seq"); + + // create distributed row matrix-like struct + SequenceFile.Writer w = + SequenceFile.createWriter(FileSystem.getLocal(conf), + conf, + aLocPath, + IntWritable.class, + VectorWritable.class, + CompressionType.BLOCK, + new DefaultCodec()); + closeables.addFirst(w); + + int n = 100; + int m = 2000; + double percent = 5; + + VectorWritable vw = new VectorWritable(); + IntWritable roww = new IntWritable(); + + Vector xi = new DenseVector(n); + + double muAmplitude = 50.0; + for (int i = 0; i < m; i++) { + Vector dv = new SequentialAccessSparseVector(n); + for (int j = 0; j < n * percent / 100; j++) { + dv.setQuick(rnd.nextInt(n), muAmplitude * (rnd.nextDouble() - 0.25)); + } + roww.set(i); + vw.set(dv); + w.append(roww, vw); + xi.assign(dv, Functions.PLUS); + } + closeables.remove(w); + Closeables.close(w, true); + + xi.assign(Functions.mult(1 / m)); + + FileSystem fs = FileSystem.get(conf); + + Path tempDirPath = getTestTempDirPath("svd-proc"); + Path aPath = new Path(tempDirPath, "A/A.seq"); + fs.copyFromLocalFile(aLocPath, aPath); + Path xiPath = new Path(tempDirPath, "xi/xi.seq"); + SSVDHelper.saveVector(xi, xiPath, conf); + + Path svdOutPath = new Path(tempDirPath, "SSVD-out"); + + // make sure we wipe out previous test results, just a convenience + fs.delete(svdOutPath, true); + + // Solver starts here: + System.out.println("Input prepared, starting solver..."); + + int ablockRows = 867; + int p = 60; + int k = 40; + SSVDSolver ssvd = + new SSVDSolver(conf, + new Path[] { aPath }, + svdOutPath, + ablockRows, + k, + p, + 3); + ssvd.setOuterBlockHeight(500); + ssvd.setAbtBlockHeight(251); + ssvd.setPcaMeanPath(xiPath); + + /* + * removing V,U jobs from this test to reduce running time. i will keep them + * put in the dense test though. + */ + ssvd.setComputeU(false); + ssvd.setComputeV(false); + + ssvd.setOverwrite(true); + ssvd.setQ(q); + ssvd.setBroadcast(true); + ssvd.run(); + + Vector stochasticSValues = ssvd.getSingularValues(); + System.out.println("--SSVD solver singular values:"); + LocalSSVDSolverSparseSequentialTest.dumpSv(stochasticSValues); + System.out.println("--Colt SVD solver singular values:"); + + // try to run the same thing without stochastic algo + double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); + + // subtract pseudo pca mean + for (int i = 0; i < m; i++) + for (int j = 0; j < n; j++) + a[i][j] -= xi.getQuick(j); + + SingularValueDecomposition svd2 = + new SingularValueDecomposition(new DenseMatrix(a)); + + Vector svalues2 = new DenseVector(svd2.getSingularValues()); + LocalSSVDSolverSparseSequentialTest.dumpSv(svalues2); + + for (int i = 0; i < k + p; i++) { + assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); + } + + double[][] mQ = + SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + + BtJob.OUTPUT_Q + "-*"), conf); + + SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), + false, + s_epsilon); + + } + +} Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverDenseTest.java Wed Feb 22 21:57:27 2012 @@ -41,10 +41,12 @@ public class LocalSSVDSolverDenseTest ex private static final double s_epsilon = 1.0E-10d; - // I actually never saw errors more than 3% worst case for this test, - // but since it's non-deterministic test, it still may occasionally produce - // bad results with a non-zero probability, so i put this pct% for error - // margin high enough so it (almost) never fails. + /* + * I actually never saw errors more than 3% worst case for this particular + * test, but since it's non-deterministic test, it still may occasionally + * produce bad results with a non-zero probability, so i put this pct% for + * error margin high enough so it (almost) never fails. + */ private static final double s_precisionPct = 10; @Test @@ -135,7 +137,7 @@ public class LocalSSVDSolverDenseTest ex ssvd.setBroadcast(false); ssvd.run(); - double[] stochasticSValues = ssvd.getSingularValues(); + Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); @@ -167,36 +169,36 @@ public class LocalSSVDSolverDenseTest ex // used to generate surrogate input for (int i = 0; i < k; i++) { - assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues[i]) + assertTrue(Math.abs((singularValues.getQuick(i) - stochasticSValues.getQuick(i)) / singularValues.getQuick(i)) <= s_precisionPct / 100); } double[][] mQ = - SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); - SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(mQ), + SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); double[][] u = - SSVDSolver.loadDistributedRowMatrix(fs, + SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "U/[^_]*"), conf); - SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon); + SSVDCommonTest.assertOrthonormality(new DenseMatrix(u), false, s_epsilon); double[][] v = - SSVDSolver.loadDistributedRowMatrix(fs, + SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "V/[^_]*"), conf); - SSVDPrototypeTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon); + SSVDCommonTest.assertOrthonormality(new DenseMatrix(v), false, s_epsilon); } - static void dumpSv(double[] s) { + static void dumpSv(Vector s) { System.out.printf("svs: "); - for (double value : s) { - System.out.printf("%f ", value); + for (Vector.Element el : s) { + System.out.printf("%f ", el.get()); } System.out.println(); Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/LocalSSVDSolverSparseSequentialTest.java Wed Feb 22 21:57:27 2012 @@ -24,8 +24,6 @@ import java.util.Deque; import java.util.LinkedList; import java.util.Random; -import com.google.common.io.Closeables; - import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -36,12 +34,15 @@ import org.apache.hadoop.io.compress.Def import org.apache.mahout.common.MahoutTestCase; import org.apache.mahout.common.RandomUtils; import org.apache.mahout.math.DenseMatrix; +import org.apache.mahout.math.DenseVector; import org.apache.mahout.math.SequentialAccessSparseVector; import org.apache.mahout.math.SingularValueDecomposition; import org.apache.mahout.math.Vector; import org.apache.mahout.math.VectorWritable; import org.junit.Test; +import com.google.common.io.Closeables; + /** * * Tests SSVD solver with a made-up data running hadoop solver in a local mode. @@ -58,15 +59,15 @@ public class LocalSSVDSolverSparseSequen * removing from tests to reduce test running time */ /* @Test */ - public void testSSVDSolverSparse() throws IOException { + public void testSSVDSolverSparse() throws IOException { runSSVDSolver(0); } - + @Test - public void testSSVDSolverPowerIterations1() throws IOException { + public void testSSVDSolverPowerIterations1() throws IOException { runSSVDSolver(1); } - + public void runSSVDSolver(int q) throws IOException { Configuration conf = new Configuration(); @@ -142,45 +143,46 @@ public class LocalSSVDSolverSparseSequen 3); ssvd.setOuterBlockHeight(500); ssvd.setAbtBlockHeight(251); - + /* * removing V,U jobs from this test to reduce running time. i will keep them * put in the dense test though. */ ssvd.setComputeU(false); ssvd.setComputeV(false); - + ssvd.setOverwrite(true); ssvd.setQ(q); ssvd.setBroadcast(true); ssvd.run(); - double[] stochasticSValues = ssvd.getSingularValues(); + Vector stochasticSValues = ssvd.getSingularValues(); System.out.println("--SSVD solver singular values:"); dumpSv(stochasticSValues); System.out.println("--Colt SVD solver singular values:"); // try to run the same thing without stochastic algo - double[][] a = SSVDSolver.loadDistributedRowMatrix(fs, aPath, conf); + double[][] a = SSVDHelper.loadDistributedRowMatrix(fs, aPath, conf); // SingularValueDecompositionImpl svd=new SingularValueDecompositionImpl(new // Array2DRowRealMatrix(a)); SingularValueDecomposition svd2 = new SingularValueDecomposition(new DenseMatrix(a)); - double[] svalues2 = svd2.getSingularValues(); + Vector svalues2 = new DenseVector(svd2.getSingularValues()); dumpSv(svalues2); for (int i = 0; i < k + p; i++) { - assertTrue(Math.abs(svalues2[i] - stochasticSValues[i]) <= s_epsilon); + assertTrue(Math.abs(svalues2.getQuick(i) - stochasticSValues.getQuick(i)) <= s_epsilon); } double[][] mQ = - SSVDSolver.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + SSVDHelper.loadDistributedRowMatrix(fs, new Path(svdOutPath, "Bt-job/" + BtJob.OUTPUT_Q + "-*"), conf); - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(mQ), false, s_epsilon); + SSVDCommonTest.assertOrthonormality(new DenseMatrix(mQ), + false, + s_epsilon); /* * removing tests on U and V to keep this test leaner. I will keep U,V @@ -188,27 +190,22 @@ public class LocalSSVDSolverSparseSequen */ /* - double[][] u = - SSVDSolver.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "U/[^_]*"), - conf); - - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(u), false, s_epsilon); - double[][] v = - SSVDSolver.loadDistributedRowMatrix(fs, - new Path(svdOutPath, "V/[^_]*"), - conf); - - SSVDPrototypeTest - .assertOrthonormality(new DenseMatrix(v), false, s_epsilon); - */ + * double[][] u = SSVDSolver.loadDistributedRowMatrix(fs, new + * Path(svdOutPath, "U/[^_]*"), conf); + * + * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(u), false, + * s_epsilon); double[][] v = SSVDSolver.loadDistributedRowMatrix(fs, new + * Path(svdOutPath, "V/[^_]*"), conf); + * + * SSVDPrototypeTest .assertOrthonormality(new DenseMatrix(v), false, + * s_epsilon); + */ } - static void dumpSv(double[] s) { + static void dumpSv(Vector s) { System.out.printf("svs: "); - for (double value : s) { - System.out.printf("%f ", value); + for (Vector.Element el : s) { + System.out.printf("%f ", el.get()); } System.out.println(); Copied: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java (from r1245615, mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java) URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java?p2=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java&p1=mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java&r1=1245615&r2=1292532&rev=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDPrototypeTest.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDCommonTest.java Wed Feb 22 21:57:27 2012 @@ -28,27 +28,21 @@ import org.apache.mahout.math.function.D import org.apache.mahout.math.hadoop.stochasticsvd.qr.GivensThinSolver; import org.junit.Test; -/** - * Tests parts of of Stochastic SVD solver code in local mode - * using "prototype" code (class that simulates processes - * actually happenning in the MR jobs). +/** + * Shared ssvd test code */ -public class SSVDPrototypeTest extends MahoutTestCase { +public class SSVDCommonTest extends MahoutTestCase { private static final double SCALE = 1000; private static final double SVD_EPSILON = 1.0e-10; @Test - public void testSSVDPrototype() throws Exception { - SSVDPrototype.main(null); - } - - @Test public void testGivensQR() throws Exception { // DenseMatrix m = new DenseMatrix(dims<<2,dims); Matrix m = new DenseMatrix(3, 3); m.assign(new DoubleFunction() { private final Random rnd = RandomUtils.getRandom(); + @Override public double apply(double arg0) { return rnd.nextDouble() * SCALE; @@ -65,21 +59,25 @@ public class SSVDPrototypeTest extends M m.setQuick(2, 1, 8); m.setQuick(2, 2, 9); - GivensThinSolver qrSolver = new GivensThinSolver(m.rowSize(), m.columnSize()); + GivensThinSolver qrSolver = + new GivensThinSolver(m.rowSize(), m.columnSize()); qrSolver.solve(m); Matrix qtm = new DenseMatrix(qrSolver.getThinQtTilde()); assertOrthonormality(qtm.transpose(), false, SVD_EPSILON); - Matrix aClone = new DenseMatrix(qrSolver.getThinQtTilde()).transpose() - .times(qrSolver.getRTilde()); + Matrix aClone = + new DenseMatrix(qrSolver.getThinQtTilde()).transpose() + .times(qrSolver.getRTilde()); System.out.println("aclone : " + aClone); } - public static void assertOrthonormality(Matrix mtx, boolean insufficientRank, double epsilon) { + public static void assertOrthonormality(Matrix mtx, + boolean insufficientRank, + double epsilon) { int n = mtx.columnSize(); int rank = 0; for (int i = 0; i < n; i++) { @@ -99,7 +97,8 @@ public class SSVDPrototypeTest extends M assertTrue(Math.abs((i == j && rank > j ? 1 : 0) - dot) < epsilon); } } - assertTrue((!insufficientRank && rank == n) || (insufficientRank && rank < n)); + assertTrue((!insufficientRank && rank == n) + || (insufficientRank && rank < n)); } Modified: mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java URL: http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java?rev=1292532&r1=1292531&r2=1292532&view=diff ============================================================================== --- mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java (original) +++ mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/stochasticsvd/SSVDTestsHelper.java Wed Feb 22 21:57:27 2012 @@ -108,7 +108,7 @@ public class SSVDTestsHelper { } } GramSchmidt.orthonormalizeColumns(result); - SSVDPrototypeTest.assertOrthonormality(result, false, 1.0e-10); + SSVDCommonTest.assertOrthonormality(result, false, 1.0e-10); return result; }
