OPENNLP-1076: Add validation of spans to SentenceSample

Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/d378c065
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/d378c065
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/d378c065

Branch: refs/heads/LangDetect
Commit: d378c0656ff2374a867abe0383aa841275a47d8d
Parents: 226612f
Author: Jörn Kottmann <[email protected]>
Authored: Wed May 24 12:10:37 2017 +0200
Committer: Jörn Kottmann <[email protected]>
Committed: Wed May 24 12:10:37 2017 +0200

----------------------------------------------------------------------
 .../main/java/opennlp/tools/sentdetect/SentenceSample.java  | 9 +++++++++
 .../java/opennlp/tools/sentdetect/SentenceSampleTest.java   | 7 ++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java 
b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
index dbbd193..7891cfd 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/SentenceSample.java
@@ -45,6 +45,15 @@ public class SentenceSample {
   public SentenceSample(CharSequence document, Span... sentences) {
     this.document = document.toString();
     this.sentences = Collections.unmodifiableList(new 
ArrayList<>(Arrays.asList(sentences)));
+
+    // validate that all spans are inside the document text
+    for (Span sentence : sentences) {
+      if (sentence.getEnd() > document.length()) {
+        throw new IllegalArgumentException(
+            String.format("Sentence span is outside of document text [len %d] 
and span %s",
+            document.length(), sentence));
+      }
+    }
   }
 
   public SentenceSample(Detokenizer detokenizer, String[][] sentences) {

http://git-wip-us.apache.org/repos/asf/opennlp/blob/d378c065/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
----------------------------------------------------------------------
diff --git 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
index 163cb73..2ec0978 100644
--- 
a/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
+++ 
b/opennlp-tools/src/test/java/opennlp/tools/sentdetect/SentenceSampleTest.java
@@ -29,7 +29,6 @@ public class SentenceSampleTest {
 
   @Test
   public void testRetrievingContent() {
-
     SentenceSample sample = new SentenceSample("1. 2.",
         new Span(0, 2), new Span(3, 5));
 
@@ -38,6 +37,12 @@ public class SentenceSampleTest {
     Assert.assertEquals(new Span(3, 5), sample.getSentences()[1]);
   }
 
+  @Test(expected = IllegalArgumentException.class)
+  public void testInvalidSpansFailFast() {
+    SentenceSample sample = new SentenceSample("1. 2.",
+        new Span(0, 2), new Span(5, 7));
+  }
+
   @Test
   public void testEquals() {
     Assert.assertFalse(createGoldSample() == createGoldSample());

Reply via email to