This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1781-port in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 031aea5ea3266902c63fbcc2daf7e0fc51a2f1d3 Author: Richard Zowalla <[email protected]> AuthorDate: Tue Oct 14 11:19:40 2025 +0200 OPENNLP-1781 - SentenceDetectorME throws StringIndexOutOfBoundsException when sentence starts with an abbreviation --- .../opennlp/tools/sentdetect/SentenceDetectorME.java | 3 ++- .../sentdetect/SentenceDetectorMEGermanTest.java | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java index ddcc3388..9b113e12 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java @@ -345,7 +345,8 @@ public class SentenceDetectorME implements SentenceDetector, Probabilistic { if (tokenPosition == -1) { continue; // skip fast } - final char prevChar = s.charAt(tokenPosition - 1); + + final char prevChar = s.charAt(tokenPosition == 0 ? tokenPosition : tokenPosition - 1); int tokenLength = token.length(); if (tokenPosition + tokenLength < candidateIndex || tokenPosition > candidateIndex || /* diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java index d95a1eec..0a9e99ac 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java @@ -151,6 +151,25 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { () -> assertEquals(2, probs.length)); } + /* + * A reproducer and test for OPENNLP-1767. + */ + @Test + void testSentDetectWithAbbreviationsAtSentenceStart() { + prepareResources(true); + + final String sent1 = "S. Träume sind eine Verbindung von Gedanken."; + + //There is no blank space before start of the second sentence. + String[] sents = sentenceDetector.sentDetect(sent1); + double[] probs = sentenceDetector.probs(); + + assertAll( + () -> assertEquals(1, sents.length), + () -> assertEquals(sent1, sents[0]), + () -> assertEquals(1, probs.length)); + } + /* * A reproducer and test for OPENNLP-1767. * It checks that sentence detection with common abbreviations works correctly,
