Repository: opennlp Updated Branches: refs/heads/trunk f89f3ffe2 -> 554626de2
Add japanese eos chars Thanks to Bar Perach for providing a patch! See issue OPENNLP-772 Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/554626de Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/554626de Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/554626de Branch: refs/heads/trunk Commit: 554626de2edb69951083b847b5fd13db28bae30e Parents: f89f3ff Author: Jörn Kottmann <[email protected]> Authored: Tue Dec 20 23:35:48 2016 +0100 Committer: Jörn Kottmann <[email protected]> Committed: Tue Dec 20 23:35:48 2016 +0100 ---------------------------------------------------------------------- .../java/opennlp/tools/sentdetect/lang/Factory.java | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/554626de/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java index 1f77ade..d182b26 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/lang/Factory.java @@ -36,14 +36,11 @@ public class Factory { public static final char[] thEosCharacters = new char[] { ' ','\n' }; + public static final char[] jpEosCharacters = new char[] {'ã', 'ï¼', 'ï¼'}; + public EndOfSentenceScanner createEndOfSentenceScanner(String languageCode) { - if ("th".equals(languageCode)) { - return new DefaultEndOfSentenceScanner(new char[]{' ','\n'}); - } else if("pt".equals(languageCode)) { - return new DefaultEndOfSentenceScanner(ptEosCharacters); - } - return new DefaultEndOfSentenceScanner(defaultEosCharacters); + return new DefaultEndOfSentenceScanner(getEOSCharacters(languageCode)); } public EndOfSentenceScanner createEndOfSentenceScanner( @@ -76,8 +73,10 @@ public class Factory { return thEosCharacters; } else if ("pt".equals(languageCode)) { return ptEosCharacters; + } else if ("jp".equals(languageCode)) { + return jpEosCharacters; } return defaultEosCharacters; } -} \ No newline at end of file +}
