Author: colen
Date: Fri Apr 11 03:42:31 2014
New Revision: 1586550
URL: http://svn.apache.org/r1586550
Log:
OPENNLP-672 Added feature generators parameters to CLI
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
Fri Apr 11 03:42:31 2014
@@ -32,7 +32,6 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.doccat.DoccatCrossValidatorTool.CVToolParams;
import opennlp.tools.cmdline.params.CVParams;
-import opennlp.tools.doccat.BagOfWordsFeatureGenerator;
import opennlp.tools.doccat.DoccatCrossValidator;
import opennlp.tools.doccat.DoccatEvaluationMonitor;
import opennlp.tools.doccat.DocumentSample;
@@ -86,8 +85,8 @@ public final class DoccatCrossValidatorT
}
}
- FeatureGenerator bagOfWordsFG = new BagOfWordsFeatureGenerator();
- FeatureGenerator[] featureGenerators = new FeatureGenerator[] {
bagOfWordsFG };
+ FeatureGenerator[] featureGenerators = DoccatTrainerTool
+ .createFeatureGenerators(params.getFeatureGenerators());
DoccatEvaluationMonitor[] listenersArr = listeners
.toArray(new DoccatEvaluationMonitor[listeners.size()]);
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
Fri Apr 11 03:42:31 2014
@@ -25,9 +25,12 @@ import opennlp.tools.cmdline.CmdLineUtil
import opennlp.tools.cmdline.TerminateToolException;
import opennlp.tools.cmdline.doccat.DoccatTrainerTool.TrainerToolParams;
import opennlp.tools.cmdline.params.TrainingToolParams;
+import opennlp.tools.doccat.BagOfWordsFeatureGenerator;
import opennlp.tools.doccat.DoccatModel;
import opennlp.tools.doccat.DocumentCategorizerME;
import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.FeatureGenerator;
+import opennlp.tools.util.ext.ExtensionLoader;
import opennlp.tools.util.model.ModelUtil;
public class DoccatTrainerTool
@@ -58,9 +61,13 @@ public class DoccatTrainerTool
CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);
+ FeatureGenerator[] featureGenerators = createFeatureGenerators(params
+ .getFeatureGenerators());
+
DoccatModel model;
try {
- model = DocumentCategorizerME.train(params.getLang(), sampleStream,
mlParams);
+ model = DocumentCategorizerME.train(params.getLang(), sampleStream,
+ mlParams, featureGenerators);
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while reading training
data or indexing data: " +
e.getMessage(), e);
@@ -75,4 +82,18 @@ public class DoccatTrainerTool
CmdLineUtil.writeModel("document categorizer", modelOutFile, model);
}
+
+ static FeatureGenerator[] createFeatureGenerators(String
featureGeneratorsNames) {
+ if(featureGeneratorsNames == null) {
+ FeatureGenerator[] def = {new BagOfWordsFeatureGenerator()};
+ return def;
+ }
+ String[] classes = featureGeneratorsNames.split(",");
+ FeatureGenerator[] featureGenerators = new
FeatureGenerator[classes.length];
+ for (int i = 0; i < featureGenerators.length; i++) {
+ featureGenerators[i] = ExtensionLoader.instantiateExtension(
+ FeatureGenerator.class, classes[i]);
+ }
+ return featureGenerators;
+ }
}
Modified:
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
URL:
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
---
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
(original)
+++
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
Fri Apr 11 03:42:31 2014
@@ -17,13 +17,19 @@
package opennlp.tools.cmdline.doccat;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.params.BasicTrainingParams;
/**
* TrainingParams for DocCat.
- *
+ *
* Note: Do not use this class, internal use only!
*/
interface TrainingParams extends BasicTrainingParams {
-
+
+ @ParameterDescription(valueName = "fg", description = "Comma separated
feature generator classes. Bag of words is used if not specified.")
+ @OptionalParameter
+ String getFeatureGenerators();
+
}