This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new dcf9f998 OPENNLP-1357 Use CharSequence to allow for memory management
dcf9f998 is described below
commit dcf9f998aca6e12beca1865bf17967bdce118754
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Dec 9 17:20:34 2022 +0100
OPENNLP-1357 Use CharSequence to allow for memory management
- adjusts method signatures in `SentenceDetector` and
`EndOfSentenceScanner` to use CharSequence` as proposed by reporter 'P. Austin'
- adapts existing impl classes to work (fine) with this change, see
comments in OPENNLP-1357
- adjusts JavaDoc accordingly
- adds 'Override' annotations in some spots where they were missing
---
.../sentdetect/DefaultEndOfSentenceScanner.java | 11 ++++++--
.../tools/sentdetect/EndOfSentenceScanner.java | 16 +++++------
.../tools/sentdetect/NewlineSentenceDetector.java | 4 +--
.../opennlp/tools/sentdetect/SentenceDetector.java | 12 ++++----
.../tools/sentdetect/SentenceDetectorME.java | 32 +++++++++++-----------
.../sentdetect/SentenceDetectorEvaluatorTest.java | 11 ++++----
6 files changed, 47 insertions(+), 39 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index bc9f004d..6041ba51 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -48,8 +48,15 @@ public class DefaultEndOfSentenceScanner implements
EndOfSentenceScanner {
}
@Override
- public List<Integer> getPositions(String s) {
- return getPositions(s.toCharArray());
+ public List<Integer> getPositions(CharSequence s) {
+ List<Integer> l = new ArrayList<>();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ if (eosCharacters.contains(c)) {
+ l.add(i);
+ }
+ }
+ return l;
}
@Override
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
index c1aee346..7ddddeea 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
@@ -21,7 +21,7 @@ import java.util.List;
import java.util.Set;
/**
- * Scans Strings, StringBuffers, and char[] arrays for the offsets of
+ * Scans {@link CharSequence}, {@link StringBuffer}, and {@code char[]} for
the offsets of
* sentence ending characters.
*
* <p>Implementations of this interface can use regular expressions,
@@ -46,17 +46,17 @@ public interface EndOfSentenceScanner {
* The receiver scans the specified string for sentence ending characters and
* returns their offsets.
*
- * @param s a {@link String} value
- * @return a {@link List} of Integer objects.
+ * @param s A {@link CharSequence} to be scanned.
+ * @return A {@link List} of Integer objects.
*/
- List<Integer> getPositions(String s);
+ List<Integer> getPositions(CharSequence s);
/**
* The receiver scans {@code buf} for sentence ending characters and
* returns their offsets.
*
- * @param buf a {@link StringBuffer} value
- * @return a {@link List} of Integer objects.
+ * @param buf A {@link StringBuffer} to be scanned.
+ * @return A {@link List} of Integer objects.
*/
List<Integer> getPositions(StringBuffer buf);
@@ -64,8 +64,8 @@ public interface EndOfSentenceScanner {
* The receiver scans {@code cbuf} for sentence ending characters and
* returns their offsets.
*
- * @param cbuf a {@code char[]} value
- * @return a {@link List} of Integer objects.
+ * @param cbuf A {@code char[]} to be scanned.
+ * @return A {@link List} of Integer objects.
*/
List<Integer> getPositions(char[] cbuf);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
index 84cfa259..9ba1641e 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
@@ -29,12 +29,12 @@ import opennlp.tools.util.Span;
public class NewlineSentenceDetector implements SentenceDetector {
@Override
- public String[] sentDetect(String s) {
+ public String[] sentDetect(CharSequence s) {
return Span.spansToStrings(sentPosDetect(s), s);
}
@Override
- public Span[] sentPosDetect(String s) {
+ public Span[] sentPosDetect(CharSequence s) {
List<Span> sentences = new ArrayList<>();
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
index 38cf3def..58c66fb4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
@@ -27,21 +27,21 @@ import opennlp.tools.util.Span;
public interface SentenceDetector {
/**
- * Sentence detect a string.
+ * Detects sentences in a character sequence.
*
- * @param s The string for which sentences shall to be detected.
+ * @param s The {@link CharSequence} for which sentences shall to be
detected.
* @return The String[] with the individual sentences as the array
* elements.
*/
- String[] sentDetect(String s);
+ String[] sentDetect(CharSequence s);
/**
- * Sentence detect a string.
+ * Detects sentences in a character sequence.
*
- * @param s The string for which sentences shall be detected.
+ * @param s The {@link CharSequence} for which sentences shall be detected.
*
* @return The array of {@link Span spans} (offsets into {@code s}) for
each
* detected sentence as the individuals array elements.
*/
- Span[] sentPosDetect(String s);
+ Span[] sentPosDetect(CharSequence s);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index b0cec962..9939e6d0 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -131,13 +131,14 @@ public class SentenceDetectorME implements
SentenceDetector {
}
/**
- * Detect sentences in given input String.
+ * Detects sentences in given input {@link CharSequence}..
*
- * @param s The string to be processed.
+ * @param s The {@link CharSequence}. to be processed.
*
* @return A string array containing individual sentences as elements.
*/
- public String[] sentDetect(String s) {
+ @Override
+ public String[] sentDetect(CharSequence s) {
Span[] spans = sentPosDetect(s);
String[] sentences;
if (spans.length != 0) {
@@ -152,30 +153,29 @@ public class SentenceDetectorME implements
SentenceDetector {
return sentences;
}
- private int getFirstWS(String s, int pos) {
+ private int getFirstWS(CharSequence s, int pos) {
while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos)))
pos++;
return pos;
}
- private int getFirstNonWS(String s, int pos) {
+ private int getFirstNonWS(CharSequence s, int pos) {
while (pos < s.length() && StringUtil.isWhitespace(s.charAt(pos)))
pos++;
return pos;
}
/**
- * Detect the position of the first words of sentences in a String.
+ * Detects the position of the first words of sentences in a {@link
CharSequence}.
*
- * @param s The string to be processed.
- * @return An integer array containing the positions of the end index of
- * every sentence
+ * @param s The {@link CharSequence} to be processed.
+ * @return An {@link Span span array} containing the positions of the end
index of
+ * every sentence.
*
*/
@Override
- public Span[] sentPosDetect(String s) {
+ public Span[] sentPosDetect(CharSequence s) {
sentProbs.clear();
- StringBuffer sb = new StringBuffer(s);
List<Integer> enders = scanner.getPositions(s);
List<Integer> positions = new ArrayList<>(enders.size());
@@ -188,7 +188,7 @@ public class SentenceDetectorME implements SentenceDetector
{
}
if (positions.size() > 0 && cint < positions.get(positions.size() - 1))
continue;
- double[] probs = model.eval(cgen.getContext(sb, cint));
+ double[] probs = model.eval(cgen.getContext(s, cint));
String bestOutcome = model.getBestOutcome(probs);
if (bestOutcome.equals(SPLIT) && isAcceptableBreak(s, index, cint)) {
@@ -279,10 +279,10 @@ public class SentenceDetectorME implements
SentenceDetector {
/**
* Returns the probabilities associated with the most recent
- * calls to {@link SentenceDetectorME#sentDetect(String)}.
+ * calls to {@link SentenceDetectorME#sentDetect(CharSequence)}.
*
* @return The probability for each sentence returned for the most recent
- * call to {@link SentenceDetectorME#sentDetect(String)}.
+ * call to {@link SentenceDetectorME#sentDetect(CharSequence)}.
* If not applicable, an empty array is returned.
*/
public double[] getSentenceProbabilities() {
@@ -301,12 +301,12 @@ public class SentenceDetectorME implements
SentenceDetector {
* <p>The implementation here always returns {@link true}, which means
* that the MaxentModel's outcome is taken as is.</p>
*
- * @param s the string in which the break occurred.
+ * @param s the {@link CharSequence} in which the break occurred.
* @param fromIndex the start of the segment currently being evaluated.
* @param candidateIndex the index of the candidate sentence ending.
* @return {@link true} if the break is acceptable.
*/
- protected boolean isAcceptableBreak(String s, int fromIndex, int
candidateIndex) {
+ protected boolean isAcceptableBreak(CharSequence s, int fromIndex, int
candidateIndex) {
return true;
}
diff --git
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
index c6546ed2..df3a97b7 100644
---
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
+++
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
@@ -39,7 +39,6 @@ public class SentenceDetectorEvaluatorTest {
eval.evaluateSample(SentenceSampleTest.createGoldSample());
Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure());
-
Assertions.assertEquals(0, stream.toString().length());
}
@@ -62,19 +61,21 @@ public class SentenceDetectorEvaluatorTest {
/**
* a dummy sentence detector that always return something expected
*/
- public class DummySD implements SentenceDetector {
+ public static class DummySD implements SentenceDetector {
- private SentenceSample sample;
+ private final SentenceSample sample;
public DummySD(SentenceSample sample) {
this.sample = sample;
}
- public String[] sentDetect(String s) {
+ @Override
+ public String[] sentDetect(CharSequence s) {
return null;
}
- public Span[] sentPosDetect(String s) {
+ @Override
+ public Span[] sentPosDetect(CharSequence s) {
return sample.getSentences();
}