OPENNLP-978: Set name finder defaults to perceptron and cutoff zero
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/2079931f Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/2079931f Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/2079931f Branch: refs/heads/parser_regression Commit: 2079931f074f5907bf7fd523ef757a9972bb9a74 Parents: a1bb54b Author: Jörn Kottmann <jo...@apache.org> Authored: Tue Feb 7 23:58:43 2017 +0100 Committer: Jörn Kottmann <jo...@apache.org> Committed: Thu Apr 20 12:40:19 2017 +0200 ---------------------------------------------------------------------- .../namefind/TokenNameFinderCrossValidatorTool.java | 4 ++-- .../cmdline/namefind/TokenNameFinderTrainerTool.java | 3 ++- .../main/java/opennlp/tools/namefind/NameFinderME.java | 6 ++++++ .../java/opennlp/tools/util/TrainingParameters.java | 13 +++++++++++++ .../java/opennlp/tools/namefind/NameFinderMETest.java | 1 + 5 files changed, 24 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java index 333abd9..153d6f7 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderCrossValidatorTool.java @@ -42,8 +42,8 @@ import opennlp.tools.namefind.TokenNameFinderEvaluationMonitor; import opennlp.tools.namefind.TokenNameFinderFactory; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.SequenceCodec; +import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.eval.EvaluationMonitor; -import opennlp.tools.util.model.ModelUtil; public final class TokenNameFinderCrossValidatorTool extends AbstractCrossValidatorTool<NameSample, CVToolParams> { @@ -65,7 +65,7 @@ public final class TokenNameFinderCrossValidatorTool mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { - mlParams = ModelUtil.createDefaultTrainingParameters(); + mlParams = new TrainingParameters(); } byte featureGeneratorBytes[] = http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java index a8d4417..fb73506 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/TokenNameFinderTrainerTool.java @@ -40,6 +40,7 @@ import opennlp.tools.namefind.TokenNameFinderFactory; import opennlp.tools.namefind.TokenNameFinderModel; import opennlp.tools.util.InvalidFormatException; import opennlp.tools.util.SequenceCodec; +import opennlp.tools.util.TrainingParameters; import opennlp.tools.util.featuregen.GeneratorFactory; import opennlp.tools.util.model.ArtifactSerializer; import opennlp.tools.util.model.ModelUtil; @@ -166,7 +167,7 @@ public final class TokenNameFinderTrainerTool mlParams = CmdLineUtil.loadTrainingParameters(params.getParams(), true); if (mlParams == null) { - mlParams = ModelUtil.createDefaultTrainingParameters(); + mlParams = new TrainingParameters(); } File modelOutFile = params.getModel(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java index 6ce0b83..5a16f34 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/NameFinderME.java @@ -37,6 +37,7 @@ import opennlp.tools.ml.TrainerFactory.TrainerType; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; import opennlp.tools.ml.model.SequenceClassificationModel; +import opennlp.tools.ml.perceptron.PerceptronTrainer; import opennlp.tools.util.ObjectStream; import opennlp.tools.util.Sequence; import opennlp.tools.util.SequenceCodec; @@ -219,6 +220,11 @@ public class NameFinderME implements TokenNameFinder { public static TokenNameFinderModel train(String languageCode, String type, ObjectStream<NameSample> samples, TrainingParameters trainParams, TokenNameFinderFactory factory) throws IOException { + + trainParams.putIfAbsent(TrainingParameters.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE); + trainParams.putIfAbsent(TrainingParameters.CUTOFF_PARAM, "0"); + trainParams.putIfAbsent(TrainingParameters.ITERATIONS_PARAM, "300"); + String beamSizeString = trainParams.getSettings().get(BeamSearch.BEAM_SIZE_PARAMETER); int beamSize = NameFinderME.DEFAULT_BEAM_SIZE; http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java index 188446c..3f21623 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java +++ b/opennlp-tools/src/main/java/opennlp/tools/util/TrainingParameters.java @@ -130,6 +130,19 @@ public class TrainingParameters { return params; } + public void putIfAbsent(String namespace, String key, String value) { + if (namespace == null) { + parameters.putIfAbsent(key, value); + } + else { + parameters.putIfAbsent(namespace + "." + key, value); + } + } + + public void putIfAbsent(String key, String value) { + putIfAbsent(null, key, value); + } + public void put(String namespace, String key, String value) { if (namespace == null) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/2079931f/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index eded5c5..494af62 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java @@ -285,6 +285,7 @@ public class NameFinderMETest { new PlainTextByLineStream(new MockInputStreamFactory(in), "UTF-8")); TrainingParameters params = new TrainingParameters(); + params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, Integer.toString(70)); params.put(TrainingParameters.CUTOFF_PARAM, Integer.toString(1));