This is an automated email from the ASF dual-hosted git repository. koji pushed a commit to branch revert-329-OPENNLP-1214 in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit bf0d13b4b0c5304e1134ac295a9c1cb3767c3dcc Author: Koji Sekiguchi <[email protected]> AuthorDate: Mon Oct 15 16:02:50 2018 +0900 Revert "OPENNLP-1214: use hash to avoid linear search in DefaultEndOfSentenceScanner and DefaultSDContextGenerator (#329)" This reverts commit 51cbde659383c8931525a51cf5066092826082ae. --- .../sentdetect/DefaultEndOfSentenceScanner.java | 26 +++------- .../sentdetect/DefaultSDContextGenerator.java | 26 +++++----- .../tools/sentdetect/EndOfSentenceScanner.java | 8 --- .../sentdetect/DefaultSDContextGeneratorTest.java | 59 ---------------------- 4 files changed, 22 insertions(+), 97 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java index 2b8c0be..75d0ec0 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java @@ -19,9 +19,7 @@ package opennlp.tools.sentdetect; import java.util.ArrayList; -import java.util.HashSet; import java.util.List; -import java.util.Set; /** * Default implementation of the {@link EndOfSentenceScanner}. @@ -30,9 +28,7 @@ import java.util.Set; */ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner { - private Set<Character> eosCharacters; - @Deprecated - private char[] eosChars; + private char[] eosCharacters; /** * Initializes the current instance. @@ -40,11 +36,7 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner { * @param eosCharacters */ public DefaultEndOfSentenceScanner(char[] eosCharacters) { - this.eosCharacters = new HashSet<>(); - for (char eosChar: eosCharacters) { - this.eosCharacters.add(eosChar); - } - this.eosChars = eosCharacters; + this.eosCharacters = eosCharacters; } public List<Integer> getPositions(String s) { @@ -57,21 +49,19 @@ public class DefaultEndOfSentenceScanner implements EndOfSentenceScanner { public List<Integer> getPositions(char[] cbuf) { List<Integer> l = new ArrayList<>(); + char[] eosCharacters = getEndOfSentenceCharacters(); for (int i = 0; i < cbuf.length; i++) { - if (eosCharacters.contains(cbuf[i])) { - l.add(i); + for (char eosCharacter : eosCharacters) { + if (cbuf[i] == eosCharacter) { + l.add(i); + break; + } } } return l; } - @Deprecated public char[] getEndOfSentenceCharacters() { - return eosChars; - } - - @Override - public Set<Character> getEOSCharacters() { return eosCharacters; } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java index 8c2822b..a29119b 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultSDContextGenerator.java @@ -19,7 +19,6 @@ package opennlp.tools.sentdetect; import java.util.ArrayList; import java.util.Collections; -import java.util.HashSet; import java.util.List; import java.util.Set; @@ -43,7 +42,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator { private Set<String> inducedAbbreviations; - private Set<Character> eosCharacters; + private char[] eosCharacters; /** * Creates a new <code>SDContextGenerator</code> instance with @@ -67,10 +66,7 @@ public class DefaultSDContextGenerator implements SDContextGenerator { */ public DefaultSDContextGenerator(Set<String> inducedAbbreviations, char[] eosCharacters) { this.inducedAbbreviations = inducedAbbreviations; - this.eosCharacters = new HashSet<>(); - for (char eosChar: eosCharacters) { - this.eosCharacters.add(eosChar); - } + this.eosCharacters = eosCharacters; buf = new StringBuffer(); collectFeats = new ArrayList<>(); } @@ -125,9 +121,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator { int c = position; { ///assign prefix, stop if you run into a period though otherwise stop at space while (--c > prefixStart) { - if (eosCharacters.contains(sb.charAt(c))) { - prefixStart = c; - c++; // this gets us out of while loop. + for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) { + if (sb.charAt(c) == eosCharacters[eci]) { + prefixStart = c; + c++; // this gets us out of while loop. + break; + } } } prefix = String.valueOf(sb.subSequence(prefixStart, position)).trim(); @@ -139,9 +138,12 @@ public class DefaultSDContextGenerator implements SDContextGenerator { { c = position; while (++c < suffixEnd) { - if (eosCharacters.contains(sb.charAt(c))) { - suffixEnd = c; - c--; // this gets us out of while loop. + for (int eci = 0, ecl = eosCharacters.length; eci < ecl; eci++) { + if (sb.charAt(c) == eosCharacters[eci]) { + suffixEnd = c; + c--; // this gets us out of while loop. + break; + } } } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java index 7963e37..b48ad3f 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java @@ -18,7 +18,6 @@ package opennlp.tools.sentdetect; import java.util.List; -import java.util.Set; /** * Scans Strings, StringBuffers, and char[] arrays for the offsets of @@ -35,16 +34,9 @@ public interface EndOfSentenceScanner { * Returns an array of character which can indicate the end of a sentence. * @return an array of character which can indicate the end of a sentence. */ - @Deprecated char[] getEndOfSentenceCharacters(); /** - * Returns a set of character which can indicate the end of a sentence. - * @return a set of character which can indicate the end of a sentence. - */ - Set<Character> getEOSCharacters(); - - /** * The receiver scans the specified string for sentence ending characters and * returns their offsets. * diff --git a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java deleted file mode 100644 index f010498..0000000 --- a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/DefaultSDContextGeneratorTest.java +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package opennlp.tools.sentdetect; - -import java.util.Arrays; -import java.util.Collections; -import java.util.HashSet; - -import org.junit.Assert; -import org.junit.Test; - -import opennlp.tools.sentdetect.lang.Factory; - -public class DefaultSDContextGeneratorTest { - - @Test - public void testGetContext() throws Exception { - SDContextGenerator sdContextGenerator = - new DefaultSDContextGenerator(Collections.<String>emptySet(), Factory.defaultEosCharacters); - - String[] context = sdContextGenerator.getContext( - "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2); - Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/v=/s=/n=Smith/ncap".split("/"), context); - - context = sdContextGenerator.getContext( - "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29); - Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/v=RONDHUIT/vcap/s=/n=as".split("/"), context); - } - - @Test - public void testGetContextWithAbbreviations() throws Exception { - SDContextGenerator sdContextGenerator = - new DefaultSDContextGenerator(new HashSet<>(Arrays.asList("Mr./Inc.".split("/"))), - Factory.defaultEosCharacters); - - String[] context = sdContextGenerator.getContext( - "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 2); - Assert.assertArrayEquals("sn/eos=./x=Mr/2/xcap/xabbrev/v=/s=/n=Smith/ncap".split("/"), context); - - context = sdContextGenerator.getContext( - "Mr. Smith joined RONDHUIT Inc. as a manager of sales department.", 29); - Assert.assertArrayEquals("sn/eos=./x=Inc/3/xcap/xabbrev/v=RONDHUIT/vcap/s=/n=as".split("/"), context); - } -}
