Author: srowen
Date: Wed Dec 22 11:06:55 2010
New Revision: 1051836
URL: http://svn.apache.org/viewvc?rev=1051836&view=rev
Log:
First pass at switching JobConf for Configuration to make it more Hadoop
0.20.x-friendly
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/DoubleParameter.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/FileParameter.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/IntegerParameter.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/StringParameter.java
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/canopy/CanopyClusterer.java
Wed Dec 22 11:06:55 2010
@@ -62,9 +62,6 @@ public class CanopyClusterer {
/**
* Configure the Canopy and its distance measure
- *
- * @param configuration
- * the JobConf for this job
*/
public void configure(Configuration configuration) {
try {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansClusterer.java
Wed Dec 22 11:06:55 2010
@@ -128,9 +128,6 @@ public class FuzzyKMeansClusterer {
/**
* Configure the distance measure from the job
- *
- * @param job
- * the JobConf for the job
*/
private void configure(Configuration job) {
try {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/fuzzykmeans/FuzzyKMeansDriver.java
Wed Dec 22 11:06:55 2010
@@ -541,10 +541,6 @@ public class FuzzyKMeansDriver extends A
*
* @param filePath
* the file path to the single file containing the clusters
- * @param conf
- * the JobConf
- * @param fs
- * the FileSystem
* @return true if all Clusters are converged
* @throws IOException
* if there was an IO error
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansConfigKeys.java
Wed Dec 22 11:06:55 2010
@@ -18,7 +18,7 @@
package org.apache.mahout.clustering.kmeans;
/**
- * This class holds all config keys that are relevant to be used in the KMeans
MapReduce JobConf.
+ * This class holds all config keys that are relevant to be used in the KMeans
MapReduce configuration.
* */
public interface KMeansConfigKeys {
/** Configuration key for distance measure to use. */
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/kmeans/KMeansDriver.java
Wed Dec 22 11:06:55 2010
@@ -369,10 +369,6 @@ public class KMeansDriver extends Abstra
*
* @param filePath
* the file path to the single file containing the clusters
- * @param conf
- * the JobConf
- * @param fs
- * the FileSystem
* @return true if all Clusters are converged
* @throws IOException
* if there was an IO error
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/common/AffinityMatrixInputJob.java
Wed Dec 22 11:06:55 2010
@@ -22,7 +22,6 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@@ -75,7 +74,7 @@ public final class AffinityMatrixInputJo
DistributedRowMatrix A = new DistributedRowMatrix(seqFiles,
new Path(seqFiles, "seqtmp-" + (System.nanoTime() & 0xFF)),
dimensions, dimensions);
- A.configure(new JobConf());
+ A.setConf(new Configuration());
return A;
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/eigencuts/EigencutsDriver.java
Wed Dec 22 11:06:55 2010
@@ -25,7 +25,6 @@ import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.spectral.common.AffinityMatrixInputJob;
import org.apache.mahout.clustering.spectral.common.MatrixDiagonalizeJob;
@@ -123,14 +122,14 @@ public class EigencutsDriver extends Abs
DistributedRowMatrix L =
VectorMatrixMultiplicationJob.runJob(A.getRowPath(), D,
new Path(outputCalc, "laplacian-" + (System.nanoTime() & 0xFF)));
- L.configure(new JobConf(conf));
+ L.setConf(new Configuration(conf));
// eigendecomposition (step 3)
int overshoot = (int) ((double) dimensions * OVERSHOOT_MULTIPLIER);
List<Double> eigenValues = new ArrayList<Double>(overshoot);
Matrix eigenVectors = new DenseMatrix(overshoot, dimensions);
DistributedRowMatrix U = performEigenDecomposition(conf, L, dimensions,
overshoot, eigenValues, eigenVectors, outputCalc);
- U.configure(new JobConf(conf));
+ U.setConf(new Configuration(conf));
eigenValues = eigenValues.subList(0, dimensions);
// here's where things get interesting: steps 4, 5, and 6 are unique
@@ -152,7 +151,7 @@ public class EigencutsDriver extends Abs
if (numCuts > 0) {
// recalculate A
A = new DistributedRowMatrix(input, new Path(outputTmp,
Long.toString(System.nanoTime())), dimensions, dimensions);
- A.configure(new JobConf());
+ A.setConf(new Configuration());
}
} while (numCuts > 0);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/clustering/spectral/kmeans/SpectralKMeansDriver.java
Wed Dec 22 11:06:55 2010
@@ -27,7 +27,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.clustering.Cluster;
import org.apache.mahout.clustering.WeightedVectorWritable;
@@ -136,8 +135,8 @@ public class SpectralKMeansDriver extend
new Path(outputTmp,
"afftmp-" + (System.nanoTime() & 0xFF)),
numDims,
numDims);
- JobConf depConf = new JobConf(conf);
- A.configure(depConf);
+ Configuration depConf = new Configuration(conf);
+ A.setConf(depConf);
// Next step: construct the diagonal matrix D (represented as a vector)
// and calculate the normalized Laplacian of the form:
@@ -146,7 +145,7 @@ public class SpectralKMeansDriver extend
DistributedRowMatrix L =
VectorMatrixMultiplicationJob.runJob(affSeqFiles, D,
new Path(outputCalc, "laplacian-" + (System.nanoTime() & 0xFF)));
- L.configure(depConf);
+ L.setConf(depConf);
// Next step: perform eigen-decomposition using LanczosSolver
// since some of the eigen-output is spurious and will be eliminated
@@ -174,7 +173,7 @@ public class SpectralKMeansDriver extend
verifier.runJob(conf, lanczosSeqFiles, L.getRowPath(), verifiedEigensPath,
true, 1.0, 0.0, clusters);
Path cleanedEigens = verifier.getCleanedEigensPath();
DistributedRowMatrix W = new DistributedRowMatrix(cleanedEigens, new
Path(cleanedEigens, "tmp"), clusters, numDims);
- W.configure(depConf);
+ W.setConf(depConf);
DistributedRowMatrix Wtrans = W.transpose();
// DistributedRowMatrix Wt = W.transpose();
@@ -182,7 +181,7 @@ public class SpectralKMeansDriver extend
Path unitVectors = new Path(outputCalc, "unitvectors-" +
(System.nanoTime() & 0xFF));
UnitVectorizerJob.runJob(Wtrans.getRowPath(), unitVectors);
DistributedRowMatrix Wt = new DistributedRowMatrix(unitVectors, new
Path(unitVectors, "tmp"), clusters, numDims);
- Wt.configure(depConf);
+ Wt.setConf(depConf);
// Finally, perform k-means clustering on the rows of L (or W)
// generate random initial clusters
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/HadoopUtil.java
Wed Dec 22 11:06:55 2010
@@ -22,8 +22,6 @@ import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.mahout.clustering.kmeans.KMeansDriver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -34,8 +32,7 @@ public final class HadoopUtil {
private HadoopUtil() { }
public static void overwriteOutput(Path output) throws IOException {
- Configuration conf = new JobConf(KMeansDriver.class);
- FileSystem fs = FileSystem.get(output.toUri(), conf);
+ FileSystem fs = FileSystem.get(output.toUri(), new Configuration());
//boolean wasFile = fs.isFile(output);
if (fs.exists(output)) {
log.info("Deleting {}", output);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/DoubleParameter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/DoubleParameter.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/DoubleParameter.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/DoubleParameter.java
Wed Dec 22 11:06:55 2010
@@ -17,12 +17,12 @@
package org.apache.mahout.common.parameters;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
public class DoubleParameter extends AbstractParameter<Double> {
- public DoubleParameter(String prefix, String name, JobConf jobConf, double
defaultValue, String description) {
- super(Double.class, prefix, name, jobConf, defaultValue, description);
+ public DoubleParameter(String prefix, String name, Configuration conf,
double defaultValue, String description) {
+ super(Double.class, prefix, name, conf, defaultValue, description);
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/FileParameter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/FileParameter.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/FileParameter.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/FileParameter.java
Wed Dec 22 11:06:55 2010
@@ -19,12 +19,12 @@ package org.apache.mahout.common.paramet
import java.io.File;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
public class FileParameter extends AbstractParameter<File> {
- public FileParameter(String prefix, String name, JobConf jobConf, File
defaultValue, String description) {
- super(File.class, prefix, name, jobConf, defaultValue, description);
+ public FileParameter(String prefix, String name, Configuration conf, File
defaultValue, String description) {
+ super(File.class, prefix, name, conf, defaultValue, description);
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/IntegerParameter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/IntegerParameter.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/IntegerParameter.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/IntegerParameter.java
Wed Dec 22 11:06:55 2010
@@ -17,12 +17,12 @@
package org.apache.mahout.common.parameters;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
public class IntegerParameter extends AbstractParameter<Integer> {
- public IntegerParameter(String prefix, String name, JobConf jobConf, int
defaultValue, String description) {
- super(Integer.class, prefix, name, jobConf, defaultValue, description);
+ public IntegerParameter(String prefix, String name, Configuration conf, int
defaultValue, String description) {
+ super(Integer.class, prefix, name, conf, defaultValue, description);
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/StringParameter.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/StringParameter.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/StringParameter.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/common/parameters/StringParameter.java
Wed Dec 22 11:06:55 2010
@@ -17,12 +17,12 @@
package org.apache.mahout.common.parameters;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
public class StringParameter extends AbstractParameter<String> {
- public StringParameter(String prefix, String name, JobConf jobConf, String
defaultValue, String description) {
- super(String.class, prefix, name, jobConf, defaultValue, description);
+ public StringParameter(String prefix, String name, Configuration conf,
String defaultValue, String description) {
+ super(String.class, prefix, name, conf, defaultValue, description);
}
@Override
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/df/mapreduce/partial/Step2Mapper.java
Wed Dec 22 11:06:55 2010
@@ -111,7 +111,7 @@ public class Step2Mapper extends Mapper<
}
/**
- * Useful for testing. Configures the mapper without using a JobConf<br>
+ * Useful for testing. Configures the mapper without using a
Configuration<br>
* TODO we don't need the keys partitions, the tree ids should suffice
*
* @param partition
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/DistributedRowMatrix.java
Wed Dec 22 11:06:55 2010
@@ -17,6 +17,8 @@
package org.apache.mahout.math.hadoop;
+import org.apache.hadoop.conf.Configurable;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -50,7 +52,7 @@ import java.util.NoSuchElementException;
* <pre>
* // the path must already contain an already created SequenceFile!
* DistributedRowMatrix m = new
DistributedRowMatrix("path/to/vector/sequenceFile", "tmp/path", 10000000,
250000);
- * m.configure(new JobConf());
+ * m.setConf(new Configuration());
* // now if we want to multiply a vector by this matrix, it's dimension
must equal the row dimension of this
* // matrix. If we want to timesSquared() a vector by this matrix, its
dimension must equal the column dimension
* // of the matrix.
@@ -60,13 +62,13 @@ import java.util.NoSuchElementException;
* </pre>
*
*/
-public class DistributedRowMatrix implements VectorIterable, JobConfigurable {
+public class DistributedRowMatrix implements VectorIterable, Configurable {
private static final Logger log =
LoggerFactory.getLogger(DistributedRowMatrix.class);
private final Path inputPath;
private final Path outputTmpPath;
- private JobConf conf;
+ private Configuration conf;
private Path rowPath;
private Path outputTmpBasePath;
private final int numRows;
@@ -83,7 +85,12 @@ public class DistributedRowMatrix implem
}
@Override
- public void configure(JobConf conf) {
+ public Configuration getConf() {
+ return conf;
+ }
+
+ @Override
+ public void setConf(Configuration conf) {
this.conf = conf;
try {
rowPath = FileSystem.get(conf).makeQualified(inputPath);
@@ -149,7 +156,7 @@ public class DistributedRowMatrix implem
JobConf conf =
MatrixMultiplicationJob.createMatrixMultiplyJobConf(rowPath, other.rowPath,
outPath, other.numCols);
JobClient.runJob(conf);
DistributedRowMatrix out = new DistributedRowMatrix(outPath,
outputTmpPath, numCols, other.numCols());
- out.configure(conf);
+ out.setConf(conf);
return out;
}
@@ -158,7 +165,7 @@ public class DistributedRowMatrix implem
JobConf conf = TransposeJob.buildTransposeJobConf(rowPath, outputPath,
numRows);
JobClient.runJob(conf);
DistributedRowMatrix m = new DistributedRowMatrix(outputPath,
outputTmpPath, numCols, numRows);
- m.configure(this.conf);
+ m.setConf(this.conf);
return m;
}
@@ -203,11 +210,11 @@ public class DistributedRowMatrix implem
private boolean hasNext;
private int statusIndex;
private final FileSystem fs;
- private final JobConf conf;
+ private final Configuration conf;
private final IntWritable i = new IntWritable();
private final VectorWritable v = new VectorWritable();
- public DistributedMatrixIterator(FileSystem fs, Path rowPath, JobConf
conf) throws IOException {
+ public DistributedMatrixIterator(FileSystem fs, Path rowPath,
Configuration conf) throws IOException {
this.fs = fs;
this.conf = conf;
statuses = fs.globStatus(new Path(rowPath, "*"));
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/MatrixMultiplicationJob.java
Wed Dec 22 11:06:55 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math.hadoop;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapred.FileOutputFormat;
@@ -91,8 +92,8 @@ public class MatrixMultiplicationJob ext
Integer.parseInt(argMap.get("--numRowsB")),
Integer.parseInt(argMap.get("--numColsB")));
- a.configure(new JobConf(getConf()));
- b.configure(new JobConf(getConf()));
+ a.setConf(new Configuration(getConf()));
+ b.setConf(new Configuration(getConf()));
//DistributedRowMatrix c = a.times(b);
a.times(b);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/TransposeJob.java
Wed Dec 22 11:06:55 2010
@@ -17,6 +17,7 @@
package org.apache.mahout.math.hadoop;
+import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
@@ -68,7 +69,7 @@ public class TransposeJob extends Abstra
int numCols = Integer.parseInt(parsedArgs.get("--numCols"));
DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath,
outputTmpPath, numRows, numCols);
- matrix.configure(new JobConf(getConf()));
+ matrix.setConf(new Configuration(getConf()));
matrix.transpose();
return 0;
@@ -122,12 +123,10 @@ public class TransposeJob extends Abstra
public static class TransposeReducer extends MapReduceBase
implements
Reducer<IntWritable,DistributedRowMatrix.MatrixEntryWritable,IntWritable,VectorWritable>
{
- //private JobConf conf;
private int newNumCols;
@Override
public void configure(JobConf conf) {
- //this.conf = conf;
newNumCols = conf.getInt(NUM_ROWS_KEY, Integer.MAX_VALUE);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/DistributedLanczosSolver.java
Wed Dec 22 11:06:55 2010
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
@@ -75,7 +74,7 @@ public class DistributedLanczosSolver ex
DistributedRowMatrix matrix = new DistributedRowMatrix(
inputPath, outputTmpPath,
numRows, numCols);
- matrix.configure(new JobConf(originalConfig));
+ matrix.setConf(new Configuration(originalConfig));
setConf(originalConfig);
solve(matrix, desiredRank, eigenVectors, eigenValues, isSymmetric);
serializeOutput(eigenVectors, eigenValues, new
Path(outputEigenVectorPathString));
@@ -147,7 +146,7 @@ public class DistributedLanczosSolver ex
maxError,
minEigenvalue,
inMemory,
- getConf() != null ? new
JobConf(getConf()) : new JobConf());
+ getConf() != null ? new
Configuration(getConf()) : new Configuration());
}
/**
@@ -173,7 +172,7 @@ public class DistributedLanczosSolver ex
List<Double> eigenValues = new ArrayList<Double>();
DistributedRowMatrix matrix = new DistributedRowMatrix(inputPath,
outputTmpPath, numRows, numCols);
- matrix.configure(new JobConf(getConf() != null ? getConf() : new
Configuration()));
+ matrix.setConf(new Configuration(getConf() != null ? getConf() : new
Configuration()));
solve(matrix, desiredRank, eigenVectors, eigenValues, isSymmetric);
Path outputEigenVectorPath = new Path(outputPath, RAW_EIGENVECTORS);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/math/hadoop/decomposer/EigenVerificationJob.java
Wed Dec 22 11:06:55 2010
@@ -32,7 +32,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.commandline.DefaultOptionCreator;
@@ -126,7 +125,8 @@ public class EigenVerificationJob extend
* @param maxError a double representing the maximum error
* @param minEigenValue a double representing the minimum eigenvalue
* @param inMemory a boolean requesting in-memory preparation
- * @param config the JobConf to use, or null if a default is ok (saves
referencing JobConf in calling classes unless needed)
+ * @param conf the Configuration to use, or null if a default is ok
+ * (saves referencing Configuration in calling classes unless needed)
*/
public int run(Path corpusInput,
Path eigenInput,
@@ -135,17 +135,17 @@ public class EigenVerificationJob extend
double maxError,
double minEigenValue,
boolean inMemory,
- JobConf config) throws IOException {
+ Configuration conf) throws IOException {
this.outPath = output;
this.tmpOut = tempOut;
this.maxError = maxError;
this.minEigenValue = minEigenValue;
if (eigenInput != null && eigensToVerify == null) {
- prepareEigens(config, eigenInput, inMemory);
+ prepareEigens(conf, eigenInput, inMemory);
}
DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tempOut, 1,
1);
- c.configure(config);
+ c.setConf(conf);
corpus = c;
// set up eigenverifier and orthoverifier TODO: allow multithreaded
execution
@@ -243,9 +243,9 @@ public class EigenVerificationJob extend
return eigenMetaData;
}
- private void prepareEigens(JobConf conf, Path eigenInput, boolean inMemory) {
+ private void prepareEigens(Configuration conf, Path eigenInput, boolean
inMemory) {
DistributedRowMatrix eigens = new DistributedRowMatrix(eigenInput, tmpOut,
1, 1);
- eigens.configure(conf);
+ eigens.setConf(conf);
if (inMemory) {
List<Vector> eigenVectors = new ArrayList<Vector>();
for (MatrixSlice slice : eigens) {
@@ -286,11 +286,11 @@ public class EigenVerificationJob extend
maxEigensToKeep = maxEigens;
this.maxError = maxError;
if (eigenInput != null && eigensToVerify == null) {
- prepareEigens(new JobConf(conf), eigenInput, inMemory);
+ prepareEigens(new Configuration(conf), eigenInput, inMemory);
}
DistributedRowMatrix c = new DistributedRowMatrix(corpusInput, tmpOut, 1,
1);
- c.configure(new JobConf(conf));
+ c.setConf(new Configuration(conf));
corpus = c;
eigenVerifier = new SimpleEigenVerifier();
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/TestDistributedRowMatrix.java
Wed Dec 22 11:06:55 2010
@@ -20,7 +20,6 @@ package org.apache.mahout.math.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.clustering.ClusteringTestUtils;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.Matrix;
@@ -140,14 +139,18 @@ public final class TestDistributedRowMat
final Iterator<MatrixSlice> it = m.iterator();
return new Iterator<VectorWritable>() {
@Override
- public boolean hasNext() { return it.hasNext(); }
+ public boolean hasNext() {
+ return it.hasNext();
+ }
@Override
public VectorWritable next() {
MatrixSlice slice = it.next();
return new VectorWritable(slice.vector());
}
@Override
- public void remove() { it.remove(); }
+ public void remove() {
+ it.remove();
+ }
};
}
}, true, new Path(baseTmpDirPath, "distMatrix/part-00000"), fs, conf);
@@ -156,7 +159,7 @@ public final class TestDistributedRowMat
new
Path(baseTmpDirPath, "tmpOut"),
m.numRows(),
m.numCols());
- distMatrix.configure(new JobConf(conf));
+ distMatrix.setConf(new Configuration(conf));
return distMatrix;
}
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolver.java
Wed Dec 22 11:06:55 2010
@@ -17,7 +17,7 @@
package org.apache.mahout.math.hadoop.decomposer;
-import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.conf.Configuration;
import org.apache.mahout.math.DenseMatrix;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.decomposer.SolverTest;
@@ -36,7 +36,7 @@ public final class TestDistributedLanczo
File testData = getTestTempDir("testdata");
DistributedRowMatrix corpus = new
TestDistributedRowMatrix().randomDistributedMatrix(500,
450, 400, 10, 10.0, symmetric, testData.getAbsolutePath());
- corpus.configure(new JobConf());
+ corpus.setConf(new Configuration());
DistributedLanczosSolver solver = new DistributedLanczosSolver();
int desiredRank = 30;
Matrix eigenVectors = new DenseMatrix(desiredRank, corpus.numCols());
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/math/hadoop/decomposer/TestDistributedLanczosSolverCLI.java
Wed Dec 22 11:06:55 2010
@@ -22,7 +22,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.mahout.clustering.AbstractCluster;
import org.apache.mahout.common.MahoutTestCase;
import org.apache.mahout.math.DenseMatrix;
@@ -40,7 +39,7 @@ public final class TestDistributedLanczo
Path testData = getTestTempDirPath("testdata");
DistributedRowMatrix corpus =
new TestDistributedRowMatrix().randomDistributedMatrix(500, 450, 500,
10, 10.0, true, testData.toString());
- corpus.configure(new JobConf());
+ corpus.setConf(new Configuration());
Path output = getTestTempDirPath("output");
Path tmp = getTestTempDirPath("tmp");
String[] args = {
@@ -81,7 +80,7 @@ public final class TestDistributedLanczo
Path testData = getTestTempDirPath("testdata");
DistributedRowMatrix corpus =
new TestDistributedRowMatrix().randomDistributedMatrix(500, 450, 500,
10, 10.0, true, testData.toString());
- corpus.configure(new JobConf());
+ corpus.setConf(new Configuration());
Path output = getTestTempDirPath("output");
Path tmp = getTestTempDirPath("tmp");
String[] args = {
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/cdbw/CDbwEvaluator.java
Wed Dec 22 11:06:55 2010
@@ -82,7 +82,7 @@ public class CDbwEvaluator {
* Initialize a new instance from job information
*
* @param conf
- * a JobConf with appropriate parameters
+ * a Configuration with appropriate parameters
* @param clustersIn
* a String path to the input clusters directory
*/
Modified:
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
(original)
+++
mahout/trunk/utils/src/main/java/org/apache/mahout/clustering/evaluation/ClusterEvaluator.java
Wed Dec 22 11:06:55 2010
@@ -68,7 +68,7 @@ public class ClusterEvaluator {
* Initialize a new instance from job information
*
* @param conf
- * a JobConf with appropriate parameters
+ * a Configuration with appropriate parameters
* @param clustersIn
* a String path to the input clusters directory
*/
Modified:
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java?rev=1051836&r1=1051835&r2=1051836&view=diff
==============================================================================
---
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
(original)
+++
mahout/trunk/utils/src/test/java/org/apache/mahout/clustering/TestClusterDumper.java
Wed Dec 22 11:06:55 2010
@@ -28,7 +28,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.mapred.JobConf;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
@@ -343,12 +342,12 @@ public final class TestClusterDumper ext
// now multiply the testdata matrix and the eigenvector matrix
DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp, desiredRank - 1, sampleDimension);
- JobConf conf = new JobConf(config);
- svdT.configure(conf);
+ Configuration conf = new Configuration(config);
+ svdT.setConf(conf);
DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp,
sampleData.size(), sampleDimension);
- a.configure(conf);
+ a.setConf(conf);
DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
- sData.configure(conf);
+ sData.setConf(conf);
// now run the Canopy job to prime kMeans canopies
CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false,
false);
@@ -373,17 +372,17 @@ public final class TestClusterDumper ext
int desiredRank = 13;
solver.run(testData, output, tmp, sampleData.size(), sampleDimension,
false, desiredRank);
Path rawEigenvectors = new Path(output,
DistributedLanczosSolver.RAW_EIGENVECTORS);
- JobConf conf = new JobConf(config);
+ Configuration conf = new Configuration(config);
new EigenVerificationJob().run(testData, rawEigenvectors, output, tmp,
0.5, 0.0, true, conf);
Path cleanEigenvectors = new Path(output,
EigenVerificationJob.CLEAN_EIGENVECTORS);
// now multiply the testdata matrix and the eigenvector matrix
DistributedRowMatrix svdT = new DistributedRowMatrix(cleanEigenvectors,
tmp, desiredRank - 1, sampleDimension);
- svdT.configure(conf);
+ svdT.setConf(conf);
DistributedRowMatrix a = new DistributedRowMatrix(testData, tmp,
sampleData.size(), sampleDimension);
- a.configure(conf);
+ a.setConf(conf);
DistributedRowMatrix sData = a.transpose().times(svdT.transpose());
- sData.configure(conf);
+ sData.setConf(conf);
// now run the Canopy job to prime kMeans canopies
CanopyDriver.run(conf, sData.getRowPath(), output, measure, 8, 4, false,
false);