This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new dcf9f998 OPENNLP-1357 Use CharSequence to allow for memory management
dcf9f998 is described below

commit dcf9f998aca6e12beca1865bf17967bdce118754
Author: Martin Wiesner <[email protected]>
AuthorDate: Fri Dec 9 17:20:34 2022 +0100

    OPENNLP-1357 Use CharSequence to allow for memory management
    
    - adjusts method signatures in `SentenceDetector` and 
`EndOfSentenceScanner` to use CharSequence` as proposed by reporter 'P. Austin'
    - adapts existing impl classes to work (fine) with this change, see 
comments in OPENNLP-1357
    - adjusts JavaDoc accordingly
    - adds 'Override' annotations in some spots where they were missing
---
 .../sentdetect/DefaultEndOfSentenceScanner.java    | 11 ++++++--
 .../tools/sentdetect/EndOfSentenceScanner.java     | 16 +++++------
 .../tools/sentdetect/NewlineSentenceDetector.java  |  4 +--
 .../opennlp/tools/sentdetect/SentenceDetector.java | 12 ++++----
 .../tools/sentdetect/SentenceDetectorME.java       | 32 +++++++++++-----------
 .../sentdetect/SentenceDetectorEvaluatorTest.java  | 11 ++++----
 6 files changed, 47 insertions(+), 39 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
index bc9f004d..6041ba51 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/DefaultEndOfSentenceScanner.java
@@ -48,8 +48,15 @@ public class DefaultEndOfSentenceScanner implements 
EndOfSentenceScanner {
   }
 
   @Override
-  public List<Integer> getPositions(String s) {
-    return getPositions(s.toCharArray());
+  public List<Integer> getPositions(CharSequence s) {
+    List<Integer> l = new ArrayList<>();
+    for (int i = 0; i < s.length(); i++) {
+      char c = s.charAt(i);
+      if (eosCharacters.contains(c)) {
+        l.add(i);
+      }
+    }
+    return l;
   }
 
   @Override
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
index c1aee346..7ddddeea 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/EndOfSentenceScanner.java
@@ -21,7 +21,7 @@ import java.util.List;
 import java.util.Set;
 
 /**
- * Scans Strings, StringBuffers, and char[] arrays for the offsets of
+ * Scans {@link CharSequence}, {@link StringBuffer}, and {@code char[]} for 
the offsets of
  * sentence ending characters.
  *
  * <p>Implementations of this interface can use regular expressions,
@@ -46,17 +46,17 @@ public interface EndOfSentenceScanner {
    * The receiver scans the specified string for sentence ending characters and
    * returns their offsets.
    *
-   * @param s a {@link String} value
-   * @return a {@link List} of Integer objects.
+   * @param s A {@link CharSequence} to be scanned.
+   * @return A {@link List} of Integer objects.
    */
-  List<Integer> getPositions(String s);
+  List<Integer> getPositions(CharSequence s);
 
   /**
    * The receiver scans {@code buf} for sentence ending characters and
    * returns their offsets.
    *
-   * @param buf a {@link StringBuffer} value
-   * @return a {@link List} of Integer objects.
+   * @param buf A {@link StringBuffer} to be scanned.
+   * @return A {@link List} of Integer objects.
    */
   List<Integer> getPositions(StringBuffer buf);
 
@@ -64,8 +64,8 @@ public interface EndOfSentenceScanner {
    * The receiver scans {@code cbuf} for sentence ending characters and
    * returns their offsets.
    *
-   * @param cbuf a {@code char[]} value
-   * @return a {@link List} of Integer objects.
+   * @param cbuf A {@code char[]} to be scanned.
+   * @return A {@link List} of Integer objects.
    */
   List<Integer> getPositions(char[] cbuf);
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
index 84cfa259..9ba1641e 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/NewlineSentenceDetector.java
@@ -29,12 +29,12 @@ import opennlp.tools.util.Span;
 public class NewlineSentenceDetector implements SentenceDetector {
 
   @Override
-  public String[] sentDetect(String s) {
+  public String[] sentDetect(CharSequence s) {
     return Span.spansToStrings(sentPosDetect(s), s);
   }
 
   @Override
-  public Span[] sentPosDetect(String s) {
+  public Span[] sentPosDetect(CharSequence s) {
 
     List<Span> sentences = new ArrayList<>();
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
index 38cf3def..58c66fb4 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetector.java
@@ -27,21 +27,21 @@ import opennlp.tools.util.Span;
 public interface SentenceDetector {
 
     /**
-     * Sentence detect a string.
+     * Detects sentences in a character sequence.
      *
-     * @param s The string for which sentences shall to be detected.
+     * @param s The {@link CharSequence} for which sentences shall to be 
detected.
      * @return  The String[] with the individual sentences as the array
      *          elements.
      */
-    String[] sentDetect(String s);
+    String[] sentDetect(CharSequence s);
 
     /**
-     * Sentence detect a string.
+     * Detects sentences in a character sequence.
      *
-     * @param s The string for which sentences shall be detected.
+     * @param s The {@link CharSequence} for which sentences shall be detected.
      *
      * @return The array of {@link Span spans} (offsets into {@code s}) for 
each
      * detected sentence as the individuals array elements.
      */
-    Span[] sentPosDetect(String s);
+    Span[] sentPosDetect(CharSequence s);
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
index b0cec962..9939e6d0 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java
@@ -131,13 +131,14 @@ public class SentenceDetectorME implements 
SentenceDetector {
   }
 
   /**
-   * Detect sentences in given input String.
+   * Detects sentences in given input {@link CharSequence}..
    *
-   * @param s  The string to be processed.
+   * @param s  The {@link CharSequence}. to be processed.
    *
    * @return   A string array containing individual sentences as elements.
    */
-  public String[] sentDetect(String s) {
+  @Override
+  public String[] sentDetect(CharSequence s) {
     Span[] spans = sentPosDetect(s);
     String[] sentences;
     if (spans.length != 0) {
@@ -152,30 +153,29 @@ public class SentenceDetectorME implements 
SentenceDetector {
     return sentences;
   }
 
-  private int getFirstWS(String s, int pos) {
+  private int getFirstWS(CharSequence s, int pos) {
     while (pos < s.length() && !StringUtil.isWhitespace(s.charAt(pos)))
       pos++;
     return pos;
   }
 
-  private int getFirstNonWS(String s, int pos) {
+  private int getFirstNonWS(CharSequence s, int pos) {
     while (pos < s.length() && StringUtil.isWhitespace(s.charAt(pos)))
       pos++;
     return pos;
   }
 
   /**
-   * Detect the position of the first words of sentences in a String.
+   * Detects the position of the first words of sentences in a {@link 
CharSequence}.
    *
-   * @param s  The string to be processed.
-   * @return   An integer array containing the positions of the end index of
-   *          every sentence
+   * @param s  The {@link CharSequence} to be processed.
+   * @return   An {@link Span span array} containing the positions of the end 
index of
+   *           every sentence.
    *
    */
   @Override
-  public Span[] sentPosDetect(String s) {
+  public Span[] sentPosDetect(CharSequence s) {
     sentProbs.clear();
-    StringBuffer sb = new StringBuffer(s);
     List<Integer> enders = scanner.getPositions(s);
     List<Integer> positions = new ArrayList<>(enders.size());
 
@@ -188,7 +188,7 @@ public class SentenceDetectorME implements SentenceDetector 
{
       }
       if (positions.size() > 0 && cint < positions.get(positions.size() - 1)) 
continue;
 
-      double[] probs = model.eval(cgen.getContext(sb, cint));
+      double[] probs = model.eval(cgen.getContext(s, cint));
       String bestOutcome = model.getBestOutcome(probs);
 
       if (bestOutcome.equals(SPLIT) && isAcceptableBreak(s, index, cint)) {
@@ -279,10 +279,10 @@ public class SentenceDetectorME implements 
SentenceDetector {
 
   /**
    * Returns the probabilities associated with the most recent
-   * calls to {@link SentenceDetectorME#sentDetect(String)}.
+   * calls to {@link SentenceDetectorME#sentDetect(CharSequence)}.
    *
    * @return The probability for each sentence returned for the most recent
-   *     call to {@link SentenceDetectorME#sentDetect(String)}.
+   *     call to {@link SentenceDetectorME#sentDetect(CharSequence)}.
    *     If not applicable, an empty array is returned.
    */
   public double[] getSentenceProbabilities() {
@@ -301,12 +301,12 @@ public class SentenceDetectorME implements 
SentenceDetector {
    * <p>The implementation here always returns {@link true}, which means
    * that the MaxentModel's outcome is taken as is.</p>
    *
-   * @param s the string in which the break occurred.
+   * @param s the {@link CharSequence} in which the break occurred.
    * @param fromIndex the start of the segment currently being evaluated.
    * @param candidateIndex the index of the candidate sentence ending.
    * @return {@link true} if the break is acceptable.
    */
-  protected boolean isAcceptableBreak(String s, int fromIndex, int 
candidateIndex) {
+  protected boolean isAcceptableBreak(CharSequence s, int fromIndex, int 
candidateIndex) {
     return true;
   }
 
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
index c6546ed2..df3a97b7 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceDetectorEvaluatorTest.java
@@ -39,7 +39,6 @@ public class SentenceDetectorEvaluatorTest {
     eval.evaluateSample(SentenceSampleTest.createGoldSample());
 
     Assertions.assertEquals(1.0, eval.getFMeasure().getFMeasure());
-
     Assertions.assertEquals(0, stream.toString().length());
   }
 
@@ -62,19 +61,21 @@ public class SentenceDetectorEvaluatorTest {
   /**
    * a dummy sentence detector that always return something expected
    */
-  public class DummySD implements SentenceDetector {
+  public static class DummySD implements SentenceDetector {
 
-    private SentenceSample sample;
+    private final SentenceSample sample;
 
     public DummySD(SentenceSample sample) {
       this.sample = sample;
     }
 
-    public String[] sentDetect(String s) {
+    @Override
+    public String[] sentDetect(CharSequence s) {
       return null;
     }
 
-    public Span[] sentPosDetect(String s) {
+    @Override
+    public Span[] sentPosDetect(CharSequence s) {
       return sample.getSentences();
     }
 

Reply via email to