Richard Zowalla created OPENNLP-1810:
----------------------------------------

             Summary: SentenceDetector fails to detect multiple instances of 
the same abbreviation in the same sentence.
                 Key: OPENNLP-1810
                 URL: https://issues.apache.org/jira/browse/OPENNLP-1810
             Project: OpenNLP
          Issue Type: Bug
          Components: Sentence Detector
    Affects Versions: 3.0.0-M1, 2.5.7
            Reporter: Richard Zowalla


 /*
   * Edge case: The same abbreviation appears twice in a single sentence 
segment.
   */
  @Test
  void testSentDetectWithDuplicateAbbreviationInSameSegment() {
    prepareResources(true);

    final String sent1 = "Lt. Vertrag und lt. Bescheid gelten andere 
Bedingungen.";

    String[] sents = sentenceDetector.sentDetect(sent1);
    double[] probs = sentenceDetector.probs();

    assertAll(
        () -> assertEquals(1, sents.length),
        () -> assertEquals(sent1, sents[0]),
        () -> assertEquals(1, probs.length));
  }



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to