Author: colen
Date: Fri Apr 11 03:42:31 2014
New Revision: 1586550

URL: http://svn.apache.org/r1586550
Log:
OPENNLP-672 Added feature generators parameters to CLI

Modified:
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
    
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatCrossValidatorTool.java
 Fri Apr 11 03:42:31 2014
@@ -32,7 +32,6 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.cmdline.doccat.DoccatCrossValidatorTool.CVToolParams;
 import opennlp.tools.cmdline.params.CVParams;
-import opennlp.tools.doccat.BagOfWordsFeatureGenerator;
 import opennlp.tools.doccat.DoccatCrossValidator;
 import opennlp.tools.doccat.DoccatEvaluationMonitor;
 import opennlp.tools.doccat.DocumentSample;
@@ -86,8 +85,8 @@ public final class DoccatCrossValidatorT
       }
     }
 
-    FeatureGenerator bagOfWordsFG = new BagOfWordsFeatureGenerator();
-    FeatureGenerator[] featureGenerators = new FeatureGenerator[] { 
bagOfWordsFG };
+    FeatureGenerator[] featureGenerators = DoccatTrainerTool
+        .createFeatureGenerators(params.getFeatureGenerators());
 
     DoccatEvaluationMonitor[] listenersArr = listeners
         .toArray(new DoccatEvaluationMonitor[listeners.size()]);

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/DoccatTrainerTool.java
 Fri Apr 11 03:42:31 2014
@@ -25,9 +25,12 @@ import opennlp.tools.cmdline.CmdLineUtil
 import opennlp.tools.cmdline.TerminateToolException;
 import opennlp.tools.cmdline.doccat.DoccatTrainerTool.TrainerToolParams;
 import opennlp.tools.cmdline.params.TrainingToolParams;
+import opennlp.tools.doccat.BagOfWordsFeatureGenerator;
 import opennlp.tools.doccat.DoccatModel;
 import opennlp.tools.doccat.DocumentCategorizerME;
 import opennlp.tools.doccat.DocumentSample;
+import opennlp.tools.doccat.FeatureGenerator;
+import opennlp.tools.util.ext.ExtensionLoader;
 import opennlp.tools.util.model.ModelUtil;
 
 public class DoccatTrainerTool
@@ -58,9 +61,13 @@ public class DoccatTrainerTool
 
     CmdLineUtil.checkOutputFile("document categorizer model", modelOutFile);
 
+    FeatureGenerator[] featureGenerators = createFeatureGenerators(params
+        .getFeatureGenerators());
+
     DoccatModel model;
     try {
-      model = DocumentCategorizerME.train(params.getLang(), sampleStream, 
mlParams);
+      model = DocumentCategorizerME.train(params.getLang(), sampleStream,
+          mlParams, featureGenerators);
     } catch (IOException e) {
       throw new TerminateToolException(-1, "IO error while reading training 
data or indexing data: " +
           e.getMessage(), e);
@@ -75,4 +82,18 @@ public class DoccatTrainerTool
     
     CmdLineUtil.writeModel("document categorizer", modelOutFile, model);
   }
+
+  static FeatureGenerator[] createFeatureGenerators(String 
featureGeneratorsNames) {
+    if(featureGeneratorsNames == null) {
+      FeatureGenerator[] def = {new BagOfWordsFeatureGenerator()};
+      return def;
+    }
+    String[] classes = featureGeneratorsNames.split(",");
+    FeatureGenerator[] featureGenerators = new 
FeatureGenerator[classes.length];
+    for (int i = 0; i < featureGenerators.length; i++) {
+      featureGenerators[i] = ExtensionLoader.instantiateExtension(
+          FeatureGenerator.class, classes[i]);
+    }
+    return featureGenerators;
+  }
 }

Modified: 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
URL: 
http://svn.apache.org/viewvc/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java?rev=1586550&r1=1586549&r2=1586550&view=diff
==============================================================================
--- 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
 (original)
+++ 
opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/doccat/TrainingParams.java
 Fri Apr 11 03:42:31 2014
@@ -17,13 +17,19 @@
 
 package opennlp.tools.cmdline.doccat;
 
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.params.BasicTrainingParams;
 
 /**
  * TrainingParams for DocCat.
- * 
+ *
  * Note: Do not use this class, internal use only!
  */
 interface TrainingParams extends BasicTrainingParams {
-  
+
+  @ParameterDescription(valueName = "fg", description = "Comma separated 
feature generator classes. Bag of words is used if not specified.")
+  @OptionalParameter
+  String getFeatureGenerators();
+
 }


Reply via email to