Author: jmannix
Date: Sat Apr 30 05:23:58 2011
New Revision: 1098044
URL: http://svn.apache.org/viewvc?rev=1098044&view=rev
Log:
Remove unnecessary copy in TimesSquaredJob, computePairwiseInnerProducts() (not
used), and properly add option for number of eigens to keep for cleansvd
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java?rev=1098044&r1=1098043&r2=1098044&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TimesSquaredJob.java
Sat Apr 30 05:23:58 2011
@@ -165,7 +165,7 @@ public final class TimesSquaredJob {
DistributedCache.setCacheFiles(new URI[] {ivpURI}, conf);
conf.set(INPUT_VECTOR, ivpURI.toString());
- conf.setBoolean(IS_SPARSE_OUTPUT, !(v instanceof DenseVector));
+ conf.setBoolean(IS_SPARSE_OUTPUT, !(v.isDense()));
conf.setInt(OUTPUT_VECTOR_DIMENSION, outputVectorDim);
FileInputFormat.addInputPath(conf, matrixInputPath);
conf.setInputFormat(SequenceFileInputFormat.class);
@@ -211,9 +211,6 @@ public final class TimesSquaredJob {
inputVector = iterator.next().get();
iterator.close();
- if (!(inputVector instanceof SequentialAccessSparseVector ||
inputVector instanceof DenseVector)) {
- inputVector = new SequentialAccessSparseVector(inputVector);
- }
int outDim = conf.getInt(OUTPUT_VECTOR_DIMENSION, Integer.MAX_VALUE);
outputVector = conf.getBoolean(IS_SPARSE_OUTPUT, false)
? new RandomAccessSparseVector(outDim, 10)
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1098044&r1=1098043&r2=1098044&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Sat Apr 30 05:23:58 2011
@@ -111,7 +111,8 @@ public class EigenVerificationJob extend
getOutputPath(),
argMap.get("--inMemory") != null,
Double.parseDouble(argMap.get("--maxError")),
- Double.parseDouble(argMap.get("--minEigenvalue")),
Integer.parseInt(argMap.get("--maxEigens")));
+ Double.parseDouble(argMap.get("--minEigenvalue")),
+ Integer.parseInt(argMap.get("--maxEigens")));
return 0;
}
@@ -151,8 +152,8 @@ public class EigenVerificationJob extend
eigenVerifier = new SimpleEigenVerifier();
- //VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();
- computePairwiseInnerProducts();
+ // we don't currently verify orthonormality here.
+ // VectorIterable pairwiseInnerProducts = computePairwiseInnerProducts();
Map<MatrixSlice, EigenStatus> eigenMetaData = verifyEigens();
@@ -174,6 +175,7 @@ public class EigenVerificationJob extend
addOption("inMemory", "mem", "Buffer eigen matrix into memory (if you have
enough!)", "false");
addOption("maxError", "err", "Maximum acceptable error", "0.05");
addOption("minEigenvalue", "mev", "Minimum eigenvalue to keep the vector
for", "0.0");
+ addOption("maxEigens", "max", "Maximum number of eigenvectors to keep (0
means all)", "0");
return parseArguments(args);
}