Repository: opennlp Updated Branches: refs/heads/master b2a2d2d2d -> aae0f2997
OPENNLP-1065: Use ISO-639-3 in test code Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/aae0f299 Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/aae0f299 Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/aae0f299 Branch: refs/heads/master Commit: aae0f29972d6323ad51ef18cdbc84d643a4739d8 Parents: b2a2d2d Author: Jörn Kottmann <[email protected]> Authored: Wed May 17 11:24:23 2017 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Thu Jun 8 12:15:05 2017 +0200 ---------------------------------------------------------------------- .../tools/formats/Conll02NameSampleStream.java | 8 +- .../formats/Conll02NameSampleStreamFactory.java | 10 +- .../opennlp/tools/tokenize/lang/Factory.java | 2 +- .../tools/chunker/ChunkerFactoryTest.java | 2 +- .../opennlp/tools/chunker/ChunkerMETest.java | 4 +- .../tools/cmdline/TokenNameFinderToolTest.java | 2 +- .../opennlp/tools/eval/ArvoresDeitadasEval.java | 11 +- .../opennlp/tools/eval/Conll00ChunkerEval.java | 2 +- .../tools/eval/Conll02NameFinderEval.java | 180 +++++++++---------- .../opennlp/tools/eval/ConllXPosTaggerEval.java | 16 +- .../tools/eval/OntoNotes4NameFinderEval.java | 4 +- .../tools/eval/OntoNotes4ParserEval.java | 2 +- .../tools/eval/OntoNotes4PosTaggerEval.java | 2 +- .../tools/eval/SourceForgeModelEval.java | 6 +- .../formats/Conll02NameSampleStreamTest.java | 7 +- .../formats/ad/ADTokenSampleStreamTest.java | 2 +- .../tools/lemmatizer/LemmatizerMETest.java | 4 +- .../tools/namefind/NameFinderMETest.java | 14 +- .../TokenNameFinderCrossValidatorTest.java | 6 +- .../tools/parser/chunking/ParserTest.java | 2 +- .../tools/parser/treeinsert/ParserTest.java | 2 +- .../tools/postag/POSTaggerFactoryTest.java | 2 +- .../opennlp/tools/postag/POSTaggerMETest.java | 4 +- .../tools/sentdetect/SDEventStreamTest.java | 4 +- .../sentdetect/SentenceDetectorFactoryTest.java | 12 +- .../sentdetect/SentenceDetectorMETest.java | 10 +- .../tools/tokenize/TokenizerFactoryTest.java | 10 +- .../opennlp/tools/tokenize/TokenizerMETest.java | 2 +- .../tools/tokenize/TokenizerTestUtil.java | 4 +- 29 files changed, 166 insertions(+), 170 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java index f3c2a81..8c71f50 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStream.java @@ -50,8 +50,8 @@ import opennlp.tools.util.StringUtil; public class Conll02NameSampleStream implements ObjectStream<NameSample> { public enum LANGUAGE { - NL, - ES + NLD, + SPA } public static final int GENERATE_PERSON_ENTITIES = 0x01; @@ -120,7 +120,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> { String line; while ((line = lineStream.read()) != null && !StringUtil.isEmpty(line)) { - if (LANGUAGE.NL.equals(lang) && line.startsWith(DOCSTART)) { + if (LANGUAGE.NLD.equals(lang) && line.startsWith(DOCSTART)) { isClearAdaptiveData = true; continue; } @@ -138,7 +138,7 @@ public class Conll02NameSampleStream implements ObjectStream<NameSample> { } // Always clear adaptive data for spanish - if (LANGUAGE.ES.equals(lang)) + if (LANGUAGE.SPA.equals(lang)) isClearAdaptiveData = true; if (sentence.size() > 0) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java index bfb3170..f19328a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll02NameSampleStreamFactory.java @@ -35,7 +35,7 @@ import opennlp.tools.util.ObjectStream; public class Conll02NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> { interface Parameters extends BasicFormatParams { - @ParameterDescription(valueName = "es|nl") + @ParameterDescription(valueName = "spa|nld") String getLang(); @ParameterDescription(valueName = "per,loc,org,misc") @@ -56,12 +56,12 @@ public class Conll02NameSampleStreamFactory extends LanguageSampleStreamFactory< Parameters params = ArgumentParser.parse(args, Parameters.class); LANGUAGE lang; - if ("nl".equals(params.getLang())) { - lang = LANGUAGE.NL; + if ("nl".equals(params.getLang()) || "nld".equals(params.getLang())) { + lang = LANGUAGE.NLD; language = params.getLang(); } - else if ("es".equals(params.getLang())) { - lang = LANGUAGE.ES; + else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) { + lang = LANGUAGE.SPA; language = params.getLang(); } else { http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java index ef2a9f8..fb15317 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/lang/Factory.java @@ -37,7 +37,7 @@ public class Factory { * @return the alpha numeric pattern for the language or the default pattern. */ public Pattern getAlphanumeric(String languageCode) { - if ("pt".equals(languageCode)) { + if ("pt".equals(languageCode) || "por".equals(languageCode)) { return Pattern.compile("^[0-9a-záãâà éêÃóõôúüçA-ZÃÃÃÃÃÃÃÃÃÃÃÃÃ]+$"); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java index c75030a..4205197 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerFactoryTest.java @@ -47,7 +47,7 @@ public class ChunkerFactoryTest { private static ChunkerModel trainModel(ModelType type, ChunkerFactory factory) throws IOException { - return ChunkerME.train("en", createSampleStream(), + return ChunkerME.train("eng", createSampleStream(), TrainingParameters.defaultParams(), factory); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java index 3c04894..b7654fb 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/chunker/ChunkerMETest.java @@ -78,7 +78,7 @@ public class ChunkerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - ChunkerModel chunkerModel = ChunkerME.train("en", sampleStream, params, new ChunkerFactory()); + ChunkerModel chunkerModel = ChunkerME.train("eng", sampleStream, params, new ChunkerFactory()); this.chunker = new ChunkerME(chunkerModel); } @@ -143,7 +143,7 @@ public class ChunkerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - ChunkerME.train("en", sampleStream, params, new ChunkerFactory()); + ChunkerME.train("eng", sampleStream, params, new ChunkerFactory()); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java index a163b0c..e4a7fc6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/cmdline/TokenNameFinderToolTest.java @@ -110,7 +110,7 @@ public class TokenNameFinderToolTest { TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory(); try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) { - model = NameFinderME.train("en", null, sampleStream, params, + model = NameFinderME.train("eng", null, sampleStream, params, nameFinderFactory); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java index dd54480..6ee3eb0 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ArvoresDeitadasEval.java @@ -18,8 +18,8 @@ package opennlp.tools.eval; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; +import java.nio.charset.StandardCharsets; import org.junit.Assert; import org.junit.Test; @@ -66,14 +66,12 @@ public class ArvoresDeitadasEval { private static final String BOSQUE = "ad/Bosque_CF_8.0.ad.txt"; private static final String FLORESTA_VIRGEM = "ad/FlorestaVirgem_CF_3.0_ad.txt"; - private static final String ENCODING = "ISO-8859-1"; - - private static final String LANG = "pt"; + private static final String LANG = "por"; private static ObjectStream<String> getLineSample(String corpus) throws IOException { return new PlainTextByLineStream(new MarkableFileInputStreamFactory( - new File(EvalUtil.getOpennlpDataDir(), corpus)), ENCODING); + new File(EvalUtil.getOpennlpDataDir(), corpus)), StandardCharsets.ISO_8859_1); } private static void sentenceCrossEval(TrainingParameters params, @@ -99,8 +97,7 @@ public class ArvoresDeitadasEval { getLineSample(FLORESTA_VIRGEM), true); DictionaryDetokenizer detokenizer = new DictionaryDetokenizer( - new DetokenizationDictionary(new FileInputStream(new File( - "lang/pt/tokenizer/pt-detokenizer.xml")))); + new DetokenizationDictionary(new File("lang/pt/tokenizer/pt-detokenizer.xml"))); ObjectStream<TokenSample> samples = new NameToTokenSampleStream( detokenizer, nameSamples); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java index 62d4a46..fd3e054 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll00ChunkerEval.java @@ -52,7 +52,7 @@ public class Conll00ChunkerEval { new PlainTextByLineStream( new MarkableFileInputStreamFactory(trainFile), StandardCharsets.UTF_8)); - return ChunkerME.train("en", samples, params, new ChunkerFactory()); + return ChunkerME.train("eng", samples, params, new ChunkerFactory()); } private static void eval(ChunkerModel model, File testData, http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java index c064e3f..c233686 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/Conll02NameFinderEval.java @@ -94,13 +94,13 @@ public class Conll02NameFinderEval { public void evalDutchPersonPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6238361266294227d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.744312026002167d); } @@ -108,13 +108,13 @@ public class Conll02NameFinderEval { public void evalDutchPersonMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.5696539485359361d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7127771911298839d); } @@ -122,13 +122,13 @@ public class Conll02NameFinderEval { public void evalDutchPersonMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.6363636363636364d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7482403898213319d); } @@ -136,13 +136,13 @@ public class Conll02NameFinderEval { public void evalDutchOrganizationPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6081871345029239d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6502808988764045d); } @@ -150,13 +150,13 @@ public class Conll02NameFinderEval { public void evalDutchOrganizationMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5197969543147207d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5753228120516498d); } @@ -164,13 +164,13 @@ public class Conll02NameFinderEval { public void evalDutchOrganizationMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5412748171368861d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.5764966740576497d); } @@ -178,13 +178,13 @@ public class Conll02NameFinderEval { public void evalDutchLocationPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7978609625668449d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7880434782608695d); } @@ -192,13 +192,13 @@ public class Conll02NameFinderEval { public void evalDutchLocationMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.5451977401129944d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.680952380952381d); } @@ -206,13 +206,13 @@ public class Conll02NameFinderEval { public void evalDutchLocationMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6737683089214381d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7433903576982893d); } @@ -220,13 +220,13 @@ public class Conll02NameFinderEval { public void evalDutchMiscPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6651198762567672d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.6748166259168704d); } @@ -234,13 +234,13 @@ public class Conll02NameFinderEval { public void evalDutchMiscMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5831157528285466d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5762897914379803d); } @@ -248,13 +248,13 @@ public class Conll02NameFinderEval { public void evalDutchMiscMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.4227642276422764d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.455294863665187d); } @@ -267,12 +267,12 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, combinedType); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.727808326787117d); + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.727808326787117d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.7388253638253639d); + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7388253638253639d); } @Test @@ -284,12 +284,12 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, combinedType); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6673209028459275d); + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6673209028459275d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.6984085910208306d); + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.6984085910208306d); } @Test @@ -301,25 +301,25 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NL, params, + TokenNameFinderModel maxentModel = train(dutchTrainingFile, LANGUAGE.NLD, params, combinedType); - eval(maxentModel, dutchTestAFile, LANGUAGE.NL, combinedType, 0.6999800915787379d); + eval(maxentModel, dutchTestAFile, LANGUAGE.NLD, combinedType, 0.6999800915787379d); - eval(maxentModel, dutchTestBFile, LANGUAGE.NL, combinedType, 0.7101430258496261d); + eval(maxentModel, dutchTestBFile, LANGUAGE.NLD, combinedType, 0.7101430258496261d); } @Test public void evalSpanishPersonPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8331210191082803d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8419705694177864d); } @@ -327,13 +327,13 @@ public class Conll02NameFinderEval { public void evalSpanishPersonMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.686960933536276d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8132033008252063d); } @@ -342,13 +342,13 @@ public class Conll02NameFinderEval { public void evalSpanishPersonMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.7454634624816087d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_PERSON_ENTITIES, 0.8215339233038348d); } @@ -356,13 +356,13 @@ public class Conll02NameFinderEval { public void evalSpanishOrganizationPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7478819748758399d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7715330894579315d); } @@ -370,13 +370,13 @@ public class Conll02NameFinderEval { public void evalSpanishOrganizationMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.6982288828337874d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7640449438202247d); } @@ -384,13 +384,13 @@ public class Conll02NameFinderEval { public void evalSpanishOrganizationMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.682961897915169d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_ORGANIZATION_ENTITIES, 0.7776447105788423d); } @@ -398,13 +398,13 @@ public class Conll02NameFinderEval { public void evalSpanishLocationPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7018867924528303d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6315158777711205d); } @@ -412,13 +412,13 @@ public class Conll02NameFinderEval { public void evalSpanishLocationMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7386907929749867d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.6772777167947311d); } @@ -426,13 +426,13 @@ public class Conll02NameFinderEval { public void evalSpanishLocationMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7544565842438182d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES, 0.7005019520356944d); } @@ -440,13 +440,13 @@ public class Conll02NameFinderEval { public void evalSpanishMiscPerceptron() throws IOException { TrainingParameters params = EvalUtil.createPerceptronParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5102880658436214d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5842696629213483d); } @@ -454,13 +454,13 @@ public class Conll02NameFinderEval { public void evalSpanishMiscMaxentGis() throws IOException { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.40971168437025796d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.45703124999999994d); } @@ -468,13 +468,13 @@ public class Conll02NameFinderEval { public void evalSpanishMiscMaxentQn() throws IOException { TrainingParameters params = EvalUtil.createMaxentQnParams(); - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, Conll02NameSampleStream.GENERATE_MISC_ENTITIES); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.470219435736677d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, Conll02NameSampleStream.GENERATE_MISC_ENTITIES, 0.5020576131687243d); } @@ -487,12 +487,12 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, combinedType); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.7476700838769804d); + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7476700838769804d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7692307692307693d); + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7692307692307693d); } @Test @@ -504,12 +504,12 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, combinedType); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.707400023454908d); + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.707400023454908d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7576868829337094d); + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7576868829337094d); } @Test @@ -521,11 +521,11 @@ public class Conll02NameFinderEval { | Conll02NameSampleStream.GENERATE_LOCATION_ENTITIES | Conll02NameSampleStream.GENERATE_MISC_ENTITIES; - TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.ES, params, + TokenNameFinderModel maxentModel = train(spanishTrainingFile, LANGUAGE.SPA, params, combinedType); - eval(maxentModel, spanishTestAFile, LANGUAGE.ES, combinedType, 0.7455564833591795d); + eval(maxentModel, spanishTestAFile, LANGUAGE.SPA, combinedType, 0.7455564833591795d); - eval(maxentModel, spanishTestBFile, LANGUAGE.ES, combinedType, 0.7856735159817352d); + eval(maxentModel, spanishTestBFile, LANGUAGE.SPA, combinedType, 0.7856735159817352d); } } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java index 98a0ded..6f6ce3a 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/ConllXPosTaggerEval.java @@ -82,7 +82,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params); + "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "dan", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9504442925495558d); @@ -93,7 +93,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = EvalUtil.createMaxentQnParams(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "da", params); + "conllx/data/danish/ddt/train/danish_ddt_train.conll"), "dan", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/danish/ddt/test/danish_ddt_test.conll"), 0.9564251537935748d); @@ -104,7 +104,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params); + "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nld", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9213965980304387d); @@ -116,7 +116,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = EvalUtil.createMaxentQnParams(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nl", params); + "conllx/data/dutch/alpino/train/dutch_alpino_train.conll"), "nld", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/dutch/alpino/test/dutch_alpino_test.conll"), 0.9282005371530886d); @@ -127,7 +127,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params); + "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "por", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9671041418101244d); @@ -138,7 +138,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = EvalUtil.createMaxentQnParams(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "pt", params); + "conllx/data/portuguese/bosque/treebank/portuguese_bosque_train.conll"), "por", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/portuguese/bosque/test/portuguese_bosque_test.conll"), 0.9662519175046872d); @@ -149,7 +149,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = ModelUtil.createDefaultTrainingParameters(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params); + "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "swe", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9248585572842999d); @@ -160,7 +160,7 @@ public class ConllXPosTaggerEval { TrainingParameters params = EvalUtil.createMaxentQnParams(); POSModel maxentModel = train(new File(EvalUtil.getOpennlpDataDir(), - "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "se", params); + "conllx/data/swedish/talbanken05/train/swedish_talbanken05_train.conll"), "swe", params); eval(maxentModel, new File(EvalUtil.getOpennlpDataDir(), "conllx/data/swedish/talbanken05/test/swedish_talbanken05_test.conll"), 0.9347595473833098d); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java index a001ce9..af217f8 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4NameFinderEval.java @@ -68,7 +68,7 @@ public class OntoNotes4NameFinderEval { throws IOException { try (ObjectStream<NameSample> samples = createNameSampleStream()) { - TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", null, + TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, new TokenNameFinderFactory()); ObjectStream<NameSample> filteredSamples; @@ -150,7 +150,7 @@ public class OntoNotes4NameFinderEval { try (ObjectStream<NameSample> samples = createNameSampleStream()) { - TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", null, + TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", null, params, featureGen, resources); ObjectStream<NameSample> filteredSamples; http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java index 5606b82..bf6a508 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4ParserEval.java @@ -63,7 +63,7 @@ public class OntoNotes4ParserEval { private static void crossEval(TrainingParameters params, HeadRules rules, double expectedScore) throws IOException { try (ObjectStream<Parse> samples = createParseSampleStream()) { - ParserCrossValidator cv = new ParserCrossValidator("en", params, rules, ParserType.CHUNKING); + ParserCrossValidator cv = new ParserCrossValidator("eng", params, rules, ParserType.CHUNKING); cv.evaluate(samples, 5); Assert.assertEquals(expectedScore, cv.getFMeasure().getFMeasure(), 0.0001d); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java index 3ea7abe..b3939e0 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/OntoNotes4PosTaggerEval.java @@ -60,7 +60,7 @@ public class OntoNotes4PosTaggerEval { private static void crossEval(TrainingParameters params, double expectedScore) throws IOException { try (ObjectStream<POSSample> samples = createPOSSampleStream()) { - POSTaggerCrossValidator cv = new POSTaggerCrossValidator("en", params, new POSTaggerFactory()); + POSTaggerCrossValidator cv = new POSTaggerCrossValidator("eng", params, new POSTaggerFactory()); cv.evaluate(samples, 5); Assert.assertEquals(expectedScore, cv.getWordAccuracy(), 0.0001d); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java b/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java index 24cdcd0..89f4c5e 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java +++ b/opennlp-tools/src/test/java/opennlp/tools/eval/SourceForgeModelEval.java @@ -111,7 +111,7 @@ public class SourceForgeModelEval { StringBuilder text = new StringBuilder(); - try (ObjectStream<DocumentSample> lineBatches = new LeipzigDoccatSampleStream("en", 25, + try (ObjectStream<DocumentSample> lineBatches = new LeipzigDoccatSampleStream("eng", 25, new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")))) { @@ -145,7 +145,7 @@ public class SourceForgeModelEval { Tokenizer tokenizer = new TokenizerME(model); - try (ObjectStream<DocumentSample> lines = new LeipzigDoccatSampleStream("en", 1, + try (ObjectStream<DocumentSample> lines = new LeipzigDoccatSampleStream("eng", 1, WhitespaceTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")))) { @@ -164,7 +164,7 @@ public class SourceForgeModelEval { } private ObjectStream<DocumentSample> createLineWiseStream() throws IOException { - return new LeipzigDoccatSampleStream("en", 1, + return new LeipzigDoccatSampleStream("eng", 1, new MarkableFileInputStreamFactory(new File(EvalUtil.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt"))); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java index 7b4c374..bc0d4fd 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/Conll02NameSampleStreamTest.java @@ -29,7 +29,6 @@ import opennlp.tools.util.ObjectStream; import opennlp.tools.util.Span; /** - * * Note: * Sample training data must be UTF-8 encoded and uncompressed! */ @@ -45,7 +44,7 @@ public class Conll02NameSampleStreamTest { @Test public void testParsingSpanishSample() throws IOException { - ObjectStream<NameSample> sampleStream = openData(LANGUAGE.ES, "conll2002-es.sample"); + ObjectStream<NameSample> sampleStream = openData(LANGUAGE.SPA, "conll2002-es.sample"); NameSample personName = sampleStream.read(); @@ -67,7 +66,7 @@ public class Conll02NameSampleStreamTest { @Test public void testParsingDutchSample() throws IOException { - ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NL, "conll2002-nl.sample"); + ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample personName = sampleStream.read(); @@ -83,7 +82,7 @@ public class Conll02NameSampleStreamTest { @Test public void testReset() throws IOException { - ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NL, "conll2002-nl.sample"); + ObjectStream<NameSample> sampleStream = openData(LANGUAGE.NLD, "conll2002-nl.sample"); NameSample sample = sampleStream.read(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java index 6dd591e..23dda9b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/formats/ad/ADTokenSampleStreamTest.java @@ -54,7 +54,7 @@ public class ADTokenSampleStreamTest { File data = new File(getClass().getClassLoader() .getResource("opennlp/tools/formats/ad.sample").toURI()); String[] args = { "-data", data.getCanonicalPath(), "-encoding", "UTF-8", - "-lang", "pt", "-detokenizer", dict.getCanonicalPath() }; + "-lang", "por", "-detokenizer", dict.getCanonicalPath() }; ObjectStream<TokenSample> tokenSampleStream = factory.create(args); TokenSample sample = tokenSampleStream.read(); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java index 285af4a..f925cb3 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/lemmatizer/LemmatizerMETest.java @@ -72,7 +72,7 @@ public class LemmatizerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); - LemmatizerModel lemmatizerModel = LemmatizerME.train("en", sampleStream, + LemmatizerModel lemmatizerModel = LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); this.lemmatizer = new LemmatizerME(lemmatizerModel); @@ -98,7 +98,7 @@ public class LemmatizerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); - LemmatizerME.train("en", sampleStream, params, new LemmatizerFactory()); + LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java index 94fbb36..740e7d1 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/NameFinderMETest.java @@ -68,7 +68,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); TokenNameFinder nameFinder = new NameFinderME(nameFinderModel); @@ -126,7 +126,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -170,7 +170,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -200,7 +200,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", TYPE_OVERRIDE, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -235,7 +235,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -271,7 +271,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); @@ -322,7 +322,7 @@ public class NameFinderMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); - TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, + TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java index 0326fb2..1f4b5a6 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/namefind/TokenNameFinderCrossValidatorTest.java @@ -57,7 +57,7 @@ public class TokenNameFinderCrossValidatorTest { mlParams.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.toString()); - TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", + TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null); cv.evaluate(sampleStream, 2); @@ -88,7 +88,7 @@ public class TokenNameFinderCrossValidatorTest { NameEvaluationErrorListener listener = new NameEvaluationErrorListener(out); Map<String, Object> resources = Collections.emptyMap(); - TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", + TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, resources, listener); cv.evaluate(sampleStream, 2); @@ -113,7 +113,7 @@ public class TokenNameFinderCrossValidatorTest { mlParams.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.toString()); - TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("en", + TokenNameFinderCrossValidator cv = new TokenNameFinderCrossValidator("eng", TYPE, mlParams, null, (TokenNameFinderEvaluationMonitor)null); cv.evaluate(sampleStream, 2); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java index 9d72e9c..277a4e5 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/chunking/ParserTest.java @@ -45,7 +45,7 @@ public class ParserTest { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); - ParserModel model = Parser.train("en", parseSamples, headRules, + ParserModel model = Parser.train("eng", parseSamples, headRules, TrainingParameters.defaultParams()); opennlp.tools.parser.Parser parser = ParserFactory.create(model); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java index 92f569f..a8c0015 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/parser/treeinsert/ParserTest.java @@ -44,7 +44,7 @@ public class ParserTest { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); - ParserModel model = Parser.train("en", parseSamples, headRules, 100, 0); + ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0); opennlp.tools.parser.Parser parser = ParserFactory.create(model); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java index b98d3bf..6f3443b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerFactoryTest.java @@ -52,7 +52,7 @@ public class POSTaggerFactoryTest { private static POSModel trainPOSModel(POSTaggerFactory factory) throws IOException { - return POSTaggerME.train("en", createSampleStream(), + return POSTaggerME.train("eng", createSampleStream(), TrainingParameters.defaultParams(), factory); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java index 838150e..065fe15 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/postag/POSTaggerMETest.java @@ -54,7 +54,7 @@ public class POSTaggerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); - return POSTaggerME.train("en", createSampleStream(), params, + return POSTaggerME.train("eng", createSampleStream(), params, new POSTaggerFactory()); } @@ -101,7 +101,7 @@ public class POSTaggerMETest { params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); - POSTaggerME.train("en", stream, params, new POSTaggerFactory()); + POSTaggerME.train("eng", stream, params, new POSTaggerFactory()); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java index f71dd45..138e915 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SDEventStreamTest.java @@ -45,8 +45,8 @@ public class SDEventStreamTest { Factory factory = new Factory(); ObjectStream<Event> eventStream = new SDEventStream(sampleStream, - factory.createSentenceContextGenerator("en"), - factory.createEndOfSentenceScanner("en")); + factory.createSentenceContextGenerator("eng"), + factory.createEndOfSentenceScanner("eng")); Assert.assertEquals(SentenceDetectorME.NO_SPLIT, eventStream.read().getOutcome()); Assert.assertEquals(SentenceDetectorME.SPLIT, eventStream.read().getOutcome()); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java index 1306e8b..06de899 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorFactoryTest.java @@ -55,7 +55,7 @@ public class SentenceDetectorFactoryTest { private static SentenceModel train(SentenceDetectorFactory factory) throws IOException { - return SentenceDetectorME.train("en", createSampleStream(), factory, + return SentenceDetectorME.train("eng", createSampleStream(), factory, TrainingParameters.defaultParams()); } @@ -72,7 +72,7 @@ public class SentenceDetectorFactoryTest { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; - SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, dic, + SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); @@ -97,7 +97,7 @@ public class SentenceDetectorFactoryTest { Dictionary dic = null; char[] eos = {'.', '?'}; - SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, + SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); @@ -124,7 +124,7 @@ public class SentenceDetectorFactoryTest { Dictionary dic = null; char[] eos = null; - SentenceModel sdModel = train(new SentenceDetectorFactory("en", true, + SentenceModel sdModel = train(new SentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); @@ -154,7 +154,7 @@ public class SentenceDetectorFactoryTest { Dictionary dic = loadAbbDictionary(); char[] eos = {'.', '?'}; - SentenceModel sdModel = train(new DummySentenceDetectorFactory("en", true, + SentenceModel sdModel = train(new DummySentenceDetectorFactory("eng", true, dic, eos)); SentenceDetectorFactory factory = sdModel.getFactory(); @@ -185,7 +185,7 @@ public class SentenceDetectorFactoryTest { char[] eos = {'.', '?'}; SentenceDetectorFactory factory = SentenceDetectorFactory.create( - DummySentenceDetectorFactory.class.getCanonicalName(), "es", false, + DummySentenceDetectorFactory.class.getCanonicalName(), "spa", false, dic, eos); Assert.assertTrue(factory.getAbbreviationDictionary() instanceof DummyDictionary); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java index 5fba0fd..87ced1b 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java @@ -46,13 +46,13 @@ public class SentenceDetectorMETest { mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); - SentenceDetectorFactory factory = new SentenceDetectorFactory("en", true, null, null); + SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); SentenceModel sentdetectModel = SentenceDetectorME.train( - "en", new SentenceSampleStream(new PlainTextByLineStream(in, + "eng", new SentenceSampleStream(new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); - Assert.assertEquals("en", sentdetectModel.getLanguage()); + Assert.assertEquals("eng", sentdetectModel.getLanguage()); SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); @@ -146,9 +146,9 @@ public class SentenceDetectorMETest { mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); - SentenceDetectorFactory factory = new SentenceDetectorFactory("en", true, null, null); + SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); - SentenceDetectorME.train("en", + SentenceDetectorME.train("eng", new SentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java index b344596..a916a32 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerFactoryTest.java @@ -66,7 +66,7 @@ public class TokenizerFactoryTest { public void testDefault() throws IOException { Dictionary dic = loadAbbDictionary(); - final String lang = "es"; + final String lang = "spa"; TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null)); @@ -99,7 +99,7 @@ public class TokenizerFactoryTest { public void testNullDict() throws IOException { Dictionary dic = null; - final String lang = "es"; + final String lang = "spa"; TokenizerModel model = train(new TokenizerFactory(lang, dic, false, null)); @@ -132,7 +132,7 @@ public class TokenizerFactoryTest { public void testCustomPatternAndAlphaOpt() throws IOException { Dictionary dic = null; - final String lang = "es"; + final String lang = "spa"; String pattern = "^[0-9A-Za-z]+$"; TokenizerModel model = train(new TokenizerFactory(lang, dic, true, @@ -166,7 +166,7 @@ public class TokenizerFactoryTest { public void testDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); - final String lang = "es"; + final String lang = "spa"; String pattern = "^[0-9A-Za-z]+$"; TokenizerModel model = train(new DummyTokenizerFactory(lang, dic, true, @@ -198,7 +198,7 @@ public class TokenizerFactoryTest { @Test public void testCreateDummyFactory() throws IOException { Dictionary dic = loadAbbDictionary(); - final String lang = "es"; + final String lang = "spa"; String pattern = "^[0-9A-Za-z]+$"; TokenizerFactory factory = TokenizerFactory.create( http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java index 3dd92a0..a634b07 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerMETest.java @@ -87,7 +87,7 @@ public class TokenizerMETest { mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 5); - TokenizerME.train(samples, TokenizerFactory.create(null, "en", null, true, null), mlParams); + TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/aae0f299/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java index 4d49c58..1d43f22 100644 --- a/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java +++ b/opennlp-tools/src/test/java/opennlp/tools/tokenize/TokenizerTestUtil.java @@ -58,7 +58,7 @@ public class TokenizerTestUtil { mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); return TokenizerME.train(new CollectionObjectStream<>(samples), - TokenizerFactory.create(null, "en", null, true, null), mlParams); + TokenizerFactory.create(null, "eng", null, true, null), mlParams); } static TokenizerModel createMaxentTokenModel() throws IOException { @@ -73,7 +73,7 @@ public class TokenizerTestUtil { mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); - return TokenizerME.train(samples, TokenizerFactory.create(null, "en", null, true, null), mlParams); + return TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams); } }
