Author: srowen
Date: Tue Jan 11 20:24:05 2011
New Revision: 1057842
URL: http://svn.apache.org/viewvc?rev=1057842&view=rev
Log:
MAHOUT-580
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/classifier/Classify.java
Tue Jan 11 20:24:05 2011
@@ -110,7 +110,8 @@ public final class Classify {
BayesParameters params = new BayesParameters();
params.setGramSize(gramSize);
String modelBasePath = (String) cmdLine.getValue(pathOpt);
-
+ params.setBasePath(modelBasePath);
+
log.info("Loading model from: {}", params.print());
Algorithm algorithm;
@@ -136,11 +137,11 @@ public final class Classify {
if ("bayes".equalsIgnoreCase(classifierType)) {
log.info("Using Bayes Classifier");
algorithm = new BayesAlgorithm();
- datastore = new HBaseBayesDatastore(modelBasePath, params);
+ datastore = new HBaseBayesDatastore(params);
} else if ("cbayes".equalsIgnoreCase(classifierType)) {
log.info("Using Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
- datastore = new HBaseBayesDatastore(modelBasePath, params);
+ datastore = new HBaseBayesDatastore(params);
} else {
throw new IllegalArgumentException("Unrecognized classifier type: " +
classifierType);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/TestClassifier.java
Tue Jan 11 20:24:05 2011
@@ -174,7 +174,7 @@ public final class TestClassifier {
params.setGramSize(gramSize);
params.set("verbose", Boolean.toString(verbose));
- params.set("basePath", modelBasePath);
+ params.setBasePath(modelBasePath);
params.set("classifierType", classifierType);
params.set("dataSource", dataSource);
params.set("defaultCat", defaultCat);
@@ -223,11 +223,11 @@ public final class TestClassifier {
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
- datastore = new HBaseBayesDatastore(params.get("basePath"), params);
+ datastore = new HBaseBayesDatastore(params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
- datastore = new HBaseBayesDatastore(params.get("basePath"), params);
+ datastore = new HBaseBayesDatastore(params);
} else {
throw new IllegalArgumentException("Unrecognized classifier type: " +
params.get("classifierType"));
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/common/BayesParameters.java
Tue Jan 11 20:24:05 2011
@@ -17,26 +17,64 @@
package org.apache.mahout.classifier.bayes.common;
+import java.io.IOException;
+
import org.apache.mahout.common.Parameters;
/**
* BayesParameter used for passing parameters to the Map/Reduce Jobs
parameters include gramSize,
*/
-public class BayesParameters extends Parameters {
+public final class BayesParameters extends Parameters {
+
+ private static final String DEFAULT_MIN_SUPPORT = "-1";
+ private static final String DEFAULT_MIN_DF = "-1";
+
+ public BayesParameters() {
+
+ }
+
+ public BayesParameters(String serializedString) throws IOException {
+ super(parseParams(serializedString));
+ }
+
+ public int getGramSize() {
+ return Integer.parseInt(get("gramSize"));
+ }
public void setGramSize(int gramSize) {
set("gramSize", Integer.toString(gramSize));
}
+
+ public int getMinSupport() {
+ return Integer.parseInt(get("minSupport", DEFAULT_MIN_SUPPORT));
+ }
public void setMinSupport(int minSupport) {
set("minSupport", Integer.toString(minSupport));
}
+
+ public int getMinDF() {
+ return Integer.parseInt(get("minDf", DEFAULT_MIN_DF));
+ }
public void setMinDF(int minDf) {
set("minDf", Integer.toString(minDf));
}
+
+ public boolean isSkipCleanup() {
+ return Boolean.parseBoolean(get("skipCleanup", "false"));
+ }
public void setSkipCleanup(boolean b) {
set("skipCleanup", Boolean.toString(b));
}
+
+ public String getBasePath() {
+ return get("basePath");
+ }
+
+ public void setBasePath(String basePath) {
+ set("basePath", basePath);
+ }
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/HBaseBayesDatastore.java
Tue Jan 11 20:24:05 2011
@@ -30,6 +30,7 @@ import org.apache.hadoop.hbase.client.Ge
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesConstants;
@@ -67,8 +68,8 @@ public class HBaseBayesDatastore impleme
private double sigmaJSigmaK = -1.0;
- public HBaseBayesDatastore(String hbaseTable, Parameters params) {
- this.hbaseTable = hbaseTable;
+ public HBaseBayesDatastore(BayesParameters params) {
+ this.hbaseTable = params.getBasePath();
this.parameters = params;
this.tableCache = new HybridCache<String,Result>(50000, 100000);
alphaI = Double.valueOf(parameters.get("alpha_i", "1.0"));
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/datastore/InMemoryBayesDatastore.java
Tue Jan 11 20:24:05 2011
@@ -23,10 +23,10 @@ import java.util.Collection;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
import org.apache.mahout.classifier.bayes.io.SequenceFileModelReader;
-import org.apache.mahout.common.Parameters;
import org.apache.mahout.math.Matrix;
import org.apache.mahout.math.SparseMatrix;
import org.apache.mahout.math.map.OpenIntDoubleHashMap;
@@ -54,7 +54,7 @@ public class InMemoryBayesDatastore impl
private final Matrix weightMatrix = new SparseMatrix(new int[] {1, 0});
- private final Parameters params;
+ private final BayesParameters params;
private double thetaNormalizer = 1.0;
@@ -62,8 +62,8 @@ public class InMemoryBayesDatastore impl
private double sigmaJsigmaK = 1.0;
- public InMemoryBayesDatastore(Parameters params) {
- String basePath = params.get("basePath");
+ public InMemoryBayesDatastore(BayesParameters params) {
+ String basePath = params.getBasePath();
this.params = params;
params.set("sigma_j", basePath + "/trainer-weights/Sigma_j/part-*");
params.set("sigma_k", basePath + "/trainer-weights/Sigma_k/part-*");
@@ -76,7 +76,7 @@ public class InMemoryBayesDatastore impl
@Override
public void initialize() throws InvalidDatastoreException {
Configuration conf = new Configuration();
- String basePath = params.get("basePath");
+ String basePath = params.getBasePath();
try {
SequenceFileModelReader.loadModel(this, FileSystem.get(new
Path(basePath).toUri(), conf), params, conf);
} catch (IOException e) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesClassifierMapper.java
Tue Jan 11 20:24:05 2011
@@ -30,6 +30,7 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.mahout.classifier.ClassifierResult;
import org.apache.mahout.classifier.bayes.algorithm.BayesAlgorithm;
import org.apache.mahout.classifier.bayes.algorithm.CBayesAlgorithm;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.classifier.bayes.datastore.HBaseBayesDatastore;
import org.apache.mahout.classifier.bayes.datastore.InMemoryBayesDatastore;
import org.apache.mahout.classifier.bayes.exceptions.InvalidDatastoreException;
@@ -37,7 +38,6 @@ import org.apache.mahout.classifier.baye
import org.apache.mahout.classifier.bayes.interfaces.Datastore;
import org.apache.mahout.classifier.bayes.mapreduce.common.BayesConstants;
import org.apache.mahout.classifier.bayes.model.ClassifierContext;
-import org.apache.mahout.common.Parameters;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.nlp.NGrams;
import org.slf4j.Logger;
@@ -96,7 +96,7 @@ public class BayesClassifierMapper exten
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ BayesParameters params = new BayesParameters(job.get("bayes.parameters",
""));
log.info("Bayes Parameter {}", params.print());
log.info("{}", params.print());
Algorithm algorithm;
@@ -119,11 +119,11 @@ public class BayesClassifierMapper exten
if (params.get("classifierType").equalsIgnoreCase("bayes")) {
log.info("Testing Bayes Classifier");
algorithm = new BayesAlgorithm();
- datastore = new HBaseBayesDatastore(params.get("basePath"), params);
+ datastore = new HBaseBayesDatastore(params);
} else if (params.get("classifierType").equalsIgnoreCase("cbayes")) {
log.info("Testing Complementary Bayes Classifier");
algorithm = new CBayesAlgorithm();
- datastore = new HBaseBayesDatastore(params.get("basePath"), params);
+ datastore = new HBaseBayesDatastore(params);
} else {
throw new IllegalArgumentException("Unrecognized classifier type: "
+ params.get("classifierType"));
}
@@ -135,7 +135,7 @@ public class BayesClassifierMapper exten
classifier.initialize();
defaultCategory = params.get("defaultCat");
- gramSize = Integer.valueOf(params.get("gramSize"));
+ gramSize = params.getGramSize();
} catch (IOException ex) {
log.warn(ex.toString(), ex);
} catch (InvalidDatastoreException e) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesDriver.java
Tue Jan 11 20:24:05 2011
@@ -59,7 +59,7 @@ public class BayesDriver implements Baye
BayesThetaNormalizerDriver normalizer = new BayesThetaNormalizerDriver();
normalizer.runJob(input, output, params);
- if (Boolean.parseBoolean(params.get("skipCleanup"))) {
+ if (params.isSkipCleanup()) {
return;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerMapper.java
Tue Jan 11 20:24:05 2011
@@ -97,7 +97,7 @@ public class BayesThetaNormalizerMapper
vocabCountString = job.get("cnaivebayes.vocabCount", vocabCountString);
vocabCount = stringifier.fromString(vocabCountString);
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
alphaI = Double.valueOf(params.get("alpha_i", "1.0"));
} catch (IOException ex) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/bayes/BayesThetaNormalizerReducer.java
Tue Jan 11 20:24:05 2011
@@ -78,7 +78,7 @@ public class BayesThetaNormalizerReducer
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
if (params.get("dataSource").equals("hbase")) {
useHbase = true;
} else {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesDriver.java
Tue Jan 11 20:24:05 2011
@@ -59,7 +59,7 @@ public class CBayesDriver implements Bay
CBayesThetaNormalizerDriver normalizer = new CBayesThetaNormalizerDriver();
normalizer.runJob(input, output, params);
- if (Boolean.parseBoolean(params.get("skipCleanup"))) {
+ if (params.isSkipCleanup()) {
return;
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerMapper.java
Tue Jan 11 20:24:05 2011
@@ -126,7 +126,7 @@ public class CBayesThetaNormalizerMapper
String vocabCountString = job.get("cnaivebayes.vocabCount",
stringifier.toString(vocabCount));
vocabCount = stringifier.fromString(vocabCountString);
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
alphaI = Double.valueOf(params.get("alpha_i", "1.0"));
} catch (IOException ex) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/cbayes/CBayesThetaNormalizerReducer.java
Tue Jan 11 20:24:05 2011
@@ -78,7 +78,7 @@ public class CBayesThetaNormalizerReduce
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
if (params.get("dataSource").equals("hbase")) {
useHbase = true;
} else {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureMapper.java
Tue Jan 11 20:24:05 2011
@@ -32,7 +32,7 @@ import org.apache.hadoop.mapred.Reporter
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.shingle.ShingleFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
-import org.apache.mahout.common.Parameters;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.common.StringTuple;
import org.apache.mahout.common.iterator.ArrayIterator;
import org.apache.mahout.math.function.ObjectIntProcedure;
@@ -170,9 +170,9 @@ public class BayesFeatureMapper extends
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ BayesParameters params = new BayesParameters(job.get("bayes.parameters",
""));
log.info("Bayes Parameter {}", params.print());
- gramSize = Integer.valueOf(params.get("gramSize"));
+ gramSize = params.getGramSize();
} catch (IOException ex) {
log.warn(ex.toString(), ex);
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesFeatureReducer.java
Tue Jan 11 20:24:05 2011
@@ -29,7 +29,7 @@ import org.apache.hadoop.mapred.MapReduc
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
-import org.apache.mahout.common.Parameters;
+import org.apache.mahout.classifier.bayes.common.BayesParameters;
import org.apache.mahout.common.StringTuple;
import com.google.common.base.Preconditions;
@@ -40,9 +40,6 @@ public class BayesFeatureReducer extends
private static final Logger log =
LoggerFactory.getLogger(BayesFeatureReducer.class);
- private static final String DEFAULT_MIN_SUPPORT = "-1";
- private static final String DEFAULT_MIN_DF = "-1";
-
private double minSupport = -1;
private double minDf = -1;
@@ -112,10 +109,10 @@ public class BayesFeatureReducer extends
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ BayesParameters params = new BayesParameters(job.get("bayes.parameters",
""));
log.info("Bayes Parameter {}", params.print());
- minSupport = Integer.valueOf(params.get("minSupport",
DEFAULT_MIN_SUPPORT));
- minDf = Integer.valueOf(params.get("minDf", DEFAULT_MIN_DF));
+ minSupport = params.getMinSupport();
+ minDf = params.getMinDF();
} catch (IOException ex) {
log.warn(ex.toString(), ex);
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesTfIdfReducer.java
Tue Jan 11 20:24:05 2011
@@ -99,7 +99,7 @@ public class BayesTfIdfReducer extends M
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
if (params.get("dataSource").equals("hbase")) {
useHbase = true;
} else {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/classifier/bayes/mapreduce/common/BayesWeightSummerReducer.java
Tue Jan 11 20:24:05 2011
@@ -92,7 +92,7 @@ public class BayesWeightSummerReducer ex
@Override
public void configure(JobConf job) {
try {
- Parameters params = Parameters.fromString(job.get("bayes.parameters",
""));
+ Parameters params = new Parameters(job.get("bayes.parameters", ""));
if (params.get("dataSource").equals("hbase")) {
useHbase = true;
} else {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
--- mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
(original)
+++ mahout/trunk/core/src/main/java/org/apache/mahout/common/Parameters.java
Tue Jan 11 20:24:05 2011
@@ -32,17 +32,19 @@ public class Parameters {
private static final Logger log = LoggerFactory.getLogger(Parameters.class);
private Map<String,String> params = new HashMap<String,String>();
-
- // private Configuration conf = new Configuration();
-
+
public Parameters() {
}
-
- private Parameters(Map<String,String> params) {
+
+ public Parameters(String serializedString) throws IOException {
+ this(parseParams(serializedString));
+ }
+
+ protected Parameters(Map<String,String> params) {
this.params = params;
}
-
+
public String get(String key) {
return params.get(key);
}
@@ -59,9 +61,9 @@ public class Parameters {
@Override
public String toString() {
Configuration conf = new Configuration();
- conf
- .set("io.serializations",
-
"org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+ conf.set("io.serializations",
+ "org.apache.hadoop.io.serializer.JavaSerialization,"
+ + "org.apache.hadoop.io.serializer.WritableSerialization");
DefaultStringifier<Map<String,String>> mapStringifier = new
DefaultStringifier<Map<String,String>>(conf,
GenericsUtil.getClass(params));
try {
@@ -76,16 +78,16 @@ public class Parameters {
public String print() {
return params.toString();
}
-
- public static Parameters fromString(String serializedString) throws
IOException {
+
+ public static Map<String,String> parseParams(String serializedString) throws
IOException {
Configuration conf = new Configuration();
- conf
- .set("io.serializations",
-
"org.apache.hadoop.io.serializer.JavaSerialization,org.apache.hadoop.io.serializer.WritableSerialization");
+ conf.set("io.serializations",
+ "org.apache.hadoop.io.serializer.JavaSerialization,"
+ + "org.apache.hadoop.io.serializer.WritableSerialization");
Map<String,String> params = new HashMap<String,String>();
DefaultStringifier<Map<String,String>> mapStringifier = new
DefaultStringifier<Map<String,String>>(conf,
GenericsUtil.getClass(params));
- params = mapStringifier.fromString(serializedString);
- return new Parameters(params);
+ return mapStringifier.fromString(serializedString);
}
+
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/AggregatorReducer.java
Tue Jan 11 20:24:05 2011
@@ -49,7 +49,7 @@ public class AggregatorReducer extends R
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get("pfp.parameters", ""));
+ Parameters params = new
Parameters(context.getConfiguration().get("pfp.parameters", ""));
maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelCountingMapper.java
Tue Jan 11 20:24:05 2011
@@ -54,7 +54,7 @@ public class ParallelCountingMapper exte
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS,
""));
+ Parameters params = new
Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN,
PFPGrowth.SPLITTER.toString()));
}
}
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthMapper.java
Tue Jan 11 20:24:05 2011
@@ -71,7 +71,7 @@ public class ParallelFPGrowthMapper exte
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS,
""));
+ Parameters params = new
Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
OpenObjectIntHashMap<String> fMap = new OpenObjectIntHashMap<String>();
int i = 0;
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/ParallelFPGrowthReducer.java
Tue Jan 11 20:24:05 2011
@@ -109,7 +109,7 @@ public class ParallelFPGrowthReducer ext
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS,
""));
+ Parameters params = new
Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
int i = 0;
for (Pair<String,Long> e : PFPGrowth.deserializeList(params,
PFPGrowth.F_LIST, context.getConfiguration())) {
Modified:
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
(original)
+++
mahout/trunk/core/src/main/java/org/apache/mahout/fpm/pfpgrowth/TransactionSortingMapper.java
Tue Jan 11 20:24:05 2011
@@ -70,7 +70,7 @@ public class TransactionSortingMapper ex
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS,
""));
+ Parameters params = new
Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
int i = 0;
for (Pair<String,Long> e : PFPGrowth.deserializeList(params,
PFPGrowth.F_LIST, context.getConfiguration())) {
Modified:
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java
(original)
+++
mahout/trunk/core/src/test/java/org/apache/mahout/classifier/bayes/BayesClassifierSelfTest.java
Tue Jan 11 20:24:05 2011
@@ -75,7 +75,7 @@ public final class BayesClassifierSelfTe
TrainClassifier.trainNaiveBayes(bayesInputPath, bayesModelPath, params);
params.set("verbose", "true");
- params.set("basePath", bayesModelPath.toString());
+ params.setBasePath(bayesModelPath.toString());
params.set("classifierType", "bayes");
params.set("dataSource", "hdfs");
params.set("defaultCat", "unknown");
@@ -89,8 +89,7 @@ public final class BayesClassifierSelfTe
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(),
params.get("defaultCat"));
for (String[] entry : ClassifierData.DATA) {
- List<String> document = new NGrams(entry[1],
Integer.parseInt(params.get("gramSize")))
- .generateNGramsWithoutLabel();
+ List<String> document = new NGrams(entry[1],
params.getGramSize()).generateNGramsWithoutLabel();
assertEquals(3, classifier.classifyDocument(document.toArray(new
String[document.size()]),
params.get("defaultCat"), 100).length);
ClassifierResult result =
classifier.classifyDocument(document.toArray(new String[document.size()]),
params
@@ -128,7 +127,7 @@ public final class BayesClassifierSelfTe
TrainClassifier.trainCNaiveBayes(bayesInputPath, bayesModelPath, params);
params.set("verbose", "true");
- params.set("basePath", bayesModelPath.toString());
+ params.setBasePath(bayesModelPath.toString());
params.set("classifierType", "cbayes");
params.set("dataSource", "hdfs");
params.set("defaultCat", "unknown");
@@ -141,8 +140,7 @@ public final class BayesClassifierSelfTe
classifier.initialize();
ResultAnalyzer resultAnalyzer = new ResultAnalyzer(classifier.getLabels(),
params.get("defaultCat"));
for (String[] entry : ClassifierData.DATA) {
- List<String> document = new NGrams(entry[1],
Integer.parseInt(params.get("gramSize")))
- .generateNGramsWithoutLabel();
+ List<String> document = new NGrams(entry[1],
params.getGramSize()).generateNGramsWithoutLabel();
assertEquals(3, classifier.classifyDocument(document.toArray(new
String[document.size()]),
params.get("defaultCat"), 100).length);
ClassifierResult result =
classifier.classifyDocument(document.toArray(new String[document.size()]),
params
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleMapper.java
Tue Jan 11 20:24:05 2011
@@ -71,7 +71,7 @@ public class KeyBasedStringTupleMapper e
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get("job.parameters", ""));
+ Parameters params = new
Parameters(context.getConfiguration().get("job.parameters", ""));
splitter = Pattern.compile(params.get("splitPattern", "[ \t]*\t[ \t]*"));
int selectedFieldCount = Integer.valueOf(params.get("selectedFieldCount",
"0"));
Modified:
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
URL:
http://svn.apache.org/viewvc/mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java?rev=1057842&r1=1057841&r2=1057842&view=diff
==============================================================================
---
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
(original)
+++
mahout/trunk/examples/src/main/java/org/apache/mahout/fpm/pfpgrowth/dataset/KeyBasedStringTupleReducer.java
Tue Jan 11 20:24:05 2011
@@ -31,8 +31,8 @@ public class KeyBasedStringTupleReducer
private int maxTransactionLength = 100;
@Override
- protected void reduce(Text key, Iterable<StringTuple> values, Context
context) throws IOException,
-
InterruptedException {
+ protected void reduce(Text key, Iterable<StringTuple> values, Context
context)
+ throws IOException, InterruptedException {
Collection<String> items = new HashSet<String>();
for (StringTuple value : values) {
@@ -68,7 +68,7 @@ public class KeyBasedStringTupleReducer
@Override
protected void setup(Context context) throws IOException,
InterruptedException {
super.setup(context);
- Parameters params =
Parameters.fromString(context.getConfiguration().get("job.parameters", ""));
+ Parameters params = new
Parameters(context.getConfiguration().get("job.parameters", ""));
maxTransactionLength = Integer.valueOf(params.get("maxTransactionLength",
"100"));
}
}