Repository: opennlp Updated Branches: refs/heads/master b5b6d5c27 -> 5a0f9cafc
OPENNLP-1097: Enable the normalizers by default in langdetect Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/5a0f9caf Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/5a0f9caf Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/5a0f9caf Branch: refs/heads/master Commit: 5a0f9cafc024c691de224f676c574c00fd56e8b1 Parents: b5b6d5c Author: Jörn Kottmann <[email protected]> Authored: Thu Jun 22 14:52:10 2017 +0200 Committer: Jörn Kottmann <[email protected]> Committed: Thu Jun 22 16:48:36 2017 +0200 ---------------------------------------------------------------------- .../tools/langdetect/LanguageDetectorContextGenerator.java | 2 +- opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a0f9caf/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java index 1ec42fd..f0941df 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/langdetect/LanguageDetectorContextGenerator.java @@ -58,7 +58,7 @@ class LanguageDetectorContextGenerator { Collection<String> context = new ArrayList<>(); NGramModel model = new NGramModel(); - model.add(document, minLength, maxLength); + model.add(normalizer.normalize(document), minLength, maxLength); for (StringList tokenList : model) { if (tokenList.size() > 0) { http://git-wip-us.apache.org/repos/asf/opennlp/blob/5a0f9caf/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java ---------------------------------------------------------------------- diff --git a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java index 0e0e4dd..a17578b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java +++ b/opennlp-tools/src/main/java/opennlp/tools/ngram/NGramModel.java @@ -168,14 +168,14 @@ public class NGramModel implements Iterable<StringList> { * @param minLength * @param maxLength */ - public void add(String chars, int minLength, int maxLength) { + public void add(CharSequence chars, int minLength, int maxLength) { for (int lengthIndex = minLength; lengthIndex < maxLength + 1; lengthIndex++) { for (int textIndex = 0; textIndex + lengthIndex - 1 < chars.length(); textIndex++) { String gram = StringUtil.toLowerCase( - chars.substring(textIndex, textIndex + lengthIndex)); + chars.subSequence(textIndex, textIndex + lengthIndex)); add(new StringList(new String[]{gram})); }
