Author: colen
Date: Tue Jul 12 04:10:51 2011
New Revision: 1145449
URL: http://svn.apache.org/viewvc?rev=1145449&view=rev
Log:
OPENNLP-221 Refactored the evaluator and cross validator CLI tools of
the SentenceDetector to use the Parameters interface. Please review.
If it is OK I will do the same with the other tools
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java?rev=1145449&r1=1145448&r2=1145449&view=diff
==============================================================================
---
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
(original)
+++
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorCrossValidatorTool.java
Tue Jul 12 04:10:51 2011
@@ -19,7 +19,11 @@ package opennlp.tools.cmdline.sentdetect
import java.io.File;
import java.io.IOException;
+import java.nio.charset.Charset;
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
+import opennlp.tools.cmdline.BasicTrainingParametersI;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -30,6 +34,16 @@ import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.eval.FMeasure;
public final class SentenceDetectorCrossValidatorTool implements
CmdLineTool {
+
+ /**
+ * Create a list of expected parameters.
+ */
+ interface Parameters extends BasicTrainingParametersI {
+
+ @ParameterDescription(valueName = "data")
+ String getData();
+
+ }
public String getName() {
return "SentenceDetectorCrossValidator";
@@ -40,40 +54,37 @@ public final class SentenceDetectorCross
}
public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " " +
TrainingParameters.getParameterUsage() +
- " -data trainData\n" +
- TrainingParameters.getDescription();
+ return "Usage: " + CLI.CMD + " " + getName() + " " +
ArgumentParser.createUsage(Parameters.class);
}
public void run(String[] args) {
- if (args.length< 5) {
- System.out.println(getHelp());
+
+ if (!ArgumentParser.validateArguments(args, Parameters.class)) {
+ System.err.println(getHelp());
throw new TerminateToolException(1);
}
- TrainingParameters parameters = new TrainingParameters(args);
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
- if(!parameters.isValid()) {
- System.out.println(getHelp());
- throw new TerminateToolException(1);
- }
opennlp.tools.util.TrainingParameters mlParams =
-
CmdLineUtil.loadTrainingParameters(CmdLineUtil.getParameter("-params", args),
false);
+ CmdLineUtil.loadTrainingParameters(params.getParams(), false);
- File trainingDataInFile = new
File(CmdLineUtil.getParameter("-data", args));
+ File trainingDataInFile = new File(params.getData());
CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
+ Charset encoding = Charset.forName(params.getEncoding());
+
ObjectStream<SentenceSample> sampleStream =
SentenceDetectorTrainerTool.openSampleData("Training Data",
- trainingDataInFile, parameters.getEncoding());
+ trainingDataInFile, encoding);
SDCrossValidator validator;
if (mlParams == null) {
- validator = new SDCrossValidator(parameters.getLanguage(),
parameters.getCutoff(), parameters.getNumberOfIterations());
+ validator = new SDCrossValidator(params.getLang(),
params.getCutoff(), params.getIterations());
}
else {
- validator = new SDCrossValidator(parameters.getLanguage(),
mlParams);
+ validator = new SDCrossValidator(params.getLang(), mlParams);
}
try {
Modified:
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
URL:
http://svn.apache.org/viewvc/incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java?rev=1145449&r1=1145448&r2=1145449&view=diff
==============================================================================
---
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
(original)
+++
incubator/opennlp/trunk/opennlp-tools/src/main/java/opennlp/tools/cmdline/sentdetect/SentenceDetectorEvaluatorTool.java
Tue Jul 12 04:10:51 2011
@@ -21,6 +21,9 @@ import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
+import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
+import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.CLI;
import opennlp.tools.cmdline.CmdLineTool;
import opennlp.tools.cmdline.CmdLineUtil;
@@ -31,6 +34,22 @@ import opennlp.tools.sentdetect.Sentence
import opennlp.tools.util.ObjectStream;
public final class SentenceDetectorEvaluatorTool implements
CmdLineTool {
+
+ /**
+ * Create a list of expected parameters.
+ */
+ interface Parameters {
+
+ @ParameterDescription(valueName = "charsetName", description =
"specifies the encoding which should be used for reading and writing
text")
+ @OptionalParameter(defaultValue="UTF-8")
+ String getEncoding();
+
+ @ParameterDescription(valueName = "model")
+ String getModel();
+
+ @ParameterDescription(valueName = "data")
+ String getData();
+ }
public String getName() {
return "SentenceDetectorEvaluator";
@@ -41,25 +60,28 @@ public final class SentenceDetectorEvalu
}
public String getHelp() {
- return "Usage: " + CLI.CMD + " " + getName() + " -encoding
charset -model model -data testData";
+ return "Usage: " + CLI.CMD + " " + getName() + " " +
ArgumentParser.createUsage(Parameters.class);
}
public void run(String[] args) {
- if (args.length != 6) {
- System.out.println(getHelp());
+
+ if (!ArgumentParser.validateArguments(args, Parameters.class)) {
+ System.err.println(getHelp());
throw new TerminateToolException(1);
}
- Charset encoding = CmdLineUtil.getEncodingParameter(args);
+ Parameters params = ArgumentParser.parse(args, Parameters.class);
+
+ Charset encoding = Charset.forName(params.getEncoding());
if (encoding == null) {
System.out.println(getHelp());
throw new TerminateToolException(1);
}
- SentenceModel model = new SentenceModelLoader().load(new
File(CmdLineUtil.getParameter("-model", args)));
+ SentenceModel model = new SentenceModelLoader().load(new
File(params.getModel()));
- File trainingDataInFile = new
File(CmdLineUtil.getParameter("-data", args));
+ File trainingDataInFile = new File(params.getData());
CmdLineUtil.checkInputFile("Training Data", trainingDataInFile);
opennlp.tools.sentdetect.SentenceDetectorEvaluator evaluator =