OPENNLP-1011: Fix pos eval tests
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/e788ba4a Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/e788ba4a Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/e788ba4a Branch: refs/heads/parser_regression Commit: e788ba4a66900e39166f2264eec0bd14c8c609a5 Parents: 817cb03 Author: Jörn Kottmann <jo...@apache.org> Authored: Tue Mar 21 23:29:16 2017 +0100 Committer: Jörn Kottmann <jo...@apache.org> Committed: Thu Apr 20 12:40:24 2017 +0200 ---------------------------------------------------------------------- .../opennlp/tools/postag/POSTaggerFactory.java | 20 ++++++++++++++------ .../opennlp/tools/eval/ConllXPosTaggerEval.java | 16 ++++++++-------- .../tools/eval/OntoNotes4PosTaggerEval.java | 2 +- 3 files changed, 23 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/e788ba4a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java index 37143c9..c4164f4 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java @@ -27,6 +27,7 @@ import java.io.OutputStream; import java.util.Collections; import java.util.HashSet; import java.util.Map; +import java.util.Properties; import java.util.Set; import opennlp.tools.dictionary.Dictionary; @@ -186,10 +187,9 @@ public class POSTaggerFactory extends BaseToolFactory { public Map<String, ArtifactSerializer> createArtifactSerializersMap() { Map<String, ArtifactSerializer> serializers = super.createArtifactSerializersMap(); + // NOTE: This is only needed for old models and this if can be removed if support is dropped - if (Version.currentVersion().getMinor() < 8) { - POSDictionarySerializer.register(serializers); - } + POSDictionarySerializer.register(serializers); return serializers; } @@ -269,11 +269,19 @@ public class POSTaggerFactory extends BaseToolFactory { } public POSContextGenerator getPOSContextGenerator(int cacheSize) { - if (Version.currentVersion().getMinor() >= 8) { - return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators()); + + if (artifactProvider != null) { + Properties manifest = (Properties) artifactProvider.getArtifact("manifest.properties"); + + String version = manifest.getProperty("OpenNLP-Version"); + + if (Version.parse(version).getMinor() < 8) { + return new DefaultPOSContextGenerator(cacheSize, getDictionary()); + } } + + return new ConfigurablePOSContextGenerator(cacheSize, createFeatureGenerators()); - return new DefaultPOSContextGenerator(cacheSize, getDictionary()); } public SequenceValidator<String> getSequenceValidator() { http://git-wip-us.apache.org/repos/asf/opennlp/blob/e788ba4a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java index 6245961..600e599 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java @@ -84,7 +84,7 @@ public class ConllXPosTaggerEval { "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9512987012987013d); + "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9504442925495558d); } @Test @@ -95,7 +95,7 @@ public class ConllXPosTaggerEval { "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9456596035543404d); + "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9564251537935748d); } @Test @@ -106,7 +106,7 @@ public class ConllXPosTaggerEval { "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9174574753804834d); + "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9213965980304387d); } @Test @@ -117,7 +117,7 @@ public class ConllXPosTaggerEval { "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9025962399283796d); + "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9282005371530886d); } @Test @@ -128,7 +128,7 @@ public class ConllXPosTaggerEval { "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9659110277825124d); + "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9671041418101244d); } @Test @@ -139,7 +139,7 @@ public class ConllXPosTaggerEval { "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9676154763933867d); + "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9662519175046872d); } @Test @@ -150,7 +150,7 @@ public class ConllXPosTaggerEval { "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9275106082036775d); + "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9248585572842999d); } @Test @@ -161,6 +161,6 @@ public class ConllXPosTaggerEval { "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9245049504950495d); + "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9322842998585573d); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/e788ba4a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java index 5ce1fba..31b42d1 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java @@ -62,6 +62,6 @@ public class OntoNotes4PosTaggerEval { @Test public void evalEnglishMaxentTagger() throws IOException { - crossEval(ModelUtil.createDefaultTrainingParameters(), 0.9707977252663043d); + crossEval(ModelUtil.createDefaultTrainingParameters(), 0.9699561275750962d); } }