OPENNLP-176: Switch language codes to ISO-639-3 This closes #114
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/559747ab Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/559747ab Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/559747ab Branch: refs/heads/parser_regression Commit: 559747ab8c2bd8ff76ab3208aa8d61f696c083bf Parents: 1d8bcb6 Author: Jörn Kottmann <jo...@apache.org> Authored: Sun Jan 29 11:06:08 2017 +0100 Committer: Jörn Kottmann <jo...@apache.org> Committed: Thu Apr 20 12:40:20 2017 +0200 ---------------------------------------------------------------------- .../cmdline/namefind/CensusDictionaryCreatorTool.java | 2 +- .../opennlp/tools/cmdline/parser/ParserTrainerTool.java | 4 ++-- .../tools/formats/AbstractSampleStreamFactory.java | 2 +- .../tools/formats/Conll03NameSampleStreamFactory.java | 6 +++--- .../main/java/opennlp/tools/sentdetect/lang/Factory.java | 10 +++++----- 5 files changed, 12 insertions(+), 12 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java index 6042510..f9bf5e0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/namefind/CensusDictionaryCreatorTool.java @@ -50,7 +50,7 @@ public class CensusDictionaryCreatorTool extends BasicCmdLineTool { interface Parameters { @ParameterDescription(valueName = "code") - @OptionalParameter(defaultValue = "en") + @OptionalParameter(defaultValue = "eng") String getLang(); @ParameterDescription(valueName = "charsetName") http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java index 3a8dd5a..2709fd5 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java +++ b/opennlp-tools/src/main/java/opennlp/tools/cmdline/parser/ParserTrainerTool.java @@ -90,10 +90,10 @@ public final class ParserTrainerTool extends AbstractTrainerTool<Parse, TrainerT params.getHeadRulesSerializerImpl()); } else { - if ("en".equals(params.getLang())) { + if ("en".equals(params.getLang()) || "eng".equals(params.getLang())) { headRulesSerializer = new opennlp.tools.parser.lang.en.HeadRules.HeadRulesSerializer(); } - else if ("es".equals(params.getLang())) { + else if ("es".equals(params.getLang()) || "spa".equals(params.getLang())) { headRulesSerializer = new opennlp.tools.parser.lang.es.AncoraSpanishHeadRules.HeadRulesSerializer(); } else { http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java index 6a7690e..33d0f95 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/AbstractSampleStreamFactory.java @@ -34,7 +34,7 @@ public abstract class AbstractSampleStreamFactory<T> implements ObjectStreamFact } public String getLang() { - return "en"; + return "eng"; } @SuppressWarnings({"unchecked"}) http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java index 878565f..599d48a 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/formats/Conll03NameSampleStreamFactory.java @@ -32,7 +32,7 @@ import opennlp.tools.util.ObjectStream; public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory<NameSample> { interface Parameters extends BasicFormatParams { - @ParameterDescription(valueName = "en|de") + @ParameterDescription(valueName = "eng|deu") String getLang(); @ParameterDescription(valueName = "per,loc,org,misc") @@ -54,11 +54,11 @@ public class Conll03NameSampleStreamFactory extends LanguageSampleStreamFactory< // TODO: support the other languages with this CoNLL. LANGUAGE lang; - if ("en".equals(params.getLang())) { + if ("eng".equals(params.getLang())) { lang = LANGUAGE.EN; language = params.getLang(); } - else if ("de".equals(params.getLang())) { + else if ("deu".equals(params.getLang())) { lang = LANGUAGE.DE; language = params.getLang(); } http://git-wip-us.apache.org/repos/asf/opennlp/blob/559747ab/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java index 28b515b..4a34229 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java @@ -49,9 +49,9 @@ public class Factory { public SDContextGenerator createSentenceContextGenerator(String languageCode, Set<String> abbreviations) { - if ("th".equals(languageCode)) { + if ("th".equals(languageCode) || "tha".equals(languageCode)) { return new SentenceContextGenerator(); - } else if ("pt".equals(languageCode)) { + } else if ("pt".equals(languageCode) || "por".equals(languageCode)) { return new DefaultSDContextGenerator(abbreviations, ptEosCharacters); } @@ -68,11 +68,11 @@ public class Factory { } public char[] getEOSCharacters(String languageCode) { - if ("th".equals(languageCode)) { + if ("th".equals(languageCode) || "tha".equals(languageCode)) { return thEosCharacters; - } else if ("pt".equals(languageCode)) { + } else if ("pt".equals(languageCode) || "por".equals(languageCode)) { return ptEosCharacters; - } else if ("jp".equals(languageCode)) { + } else if ("jp".equals(languageCode) || "jpn".equals(languageCode)) { return jpEosCharacters; }