svn commit: r1633539 - in /stanbol/branches/release-0.12/enhancer/generic/nlp/src: main/java/org/apache/stanbol/enhancer/nlp/model/ main/java/org/apache/stanbol/enhancer/nlp/model/impl/ test/java/org/apache/stanbol/enhancer/nlp/model/

rwesten Tue, 21 Oct 2014 23:39:03 -0700

Author: rwesten
Date: Wed Oct 22 06:37:36 2014
New Revision: 1633539

URL: http://svn.apache.org/r1633539
Log:
implementation for STANBOL-1396


Modified:
    
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/Section.java
    
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java
    
stanbol/branches/release-0.12/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextTest.java

Modified: 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/Section.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/Section.java?rev=1633539&r1=1633538&r2=1633539&view=diff
==============================================================================
--- 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/Section.java
 (original)
+++ 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/Section.java
 Wed Oct 22 06:37:36 2014
@@ -39,6 +39,24 @@ public interface Section extends Span {
     Iterator<Span> getEnclosed(Set<SpanTypeEnum> types);
 
     /**
+     * Iterates over all enclosed Span within the parsed window. Only Spans
+     * with on of the parsed types are returned. 
+     * <p> 
+     * The parsed window (start/end indexes) are relative to the section. If
+     * the parsed window exceeds the Section the window adapted to the section.
+     * This means that this method will never return Spans outside the section.
+     * <p>
+     * Returned Iterators MUST NOT throw {@link 
ConcurrentModificationException}
+     * but consider additions of Spans.
+     * @param types the {@link SpanTypeEnum types} of Spans included
+     * @param startOffset the start offset relative to the start position of 
this {@link Section}
+     * @param endOffset the end offset relative to the start position of this 
{@link Section}.
+     * @return sorted iterator over the selected Spans.
+     * @since 0.12.1
+     */
+    Iterator<Span> getEnclosed(Set<SpanTypeEnum> types, int startOffset, int 
endOffset);
+    
+    /**
      * Adds an Token relative to this Sentence
      * @param start the start of the token relative to the sentence
      * @param end

Modified: 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java?rev=1633539&r1=1633538&r2=1633539&view=diff
==============================================================================
--- 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java
 (original)
+++ 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/main/java/org/apache/stanbol/enhancer/nlp/model/impl/SectionImpl.java
 Wed Oct 22 06:37:36 2014
@@ -16,6 +16,7 @@
 */
 package org.apache.stanbol.enhancer.nlp.model.impl;
 
+import java.util.Collections;
 import java.util.ConcurrentModificationException;
 import java.util.Iterator;
 import java.util.NavigableMap;
@@ -74,6 +75,28 @@ public abstract class SectionImpl extend
                 }
             });
     }
+    
+    @Override
+    @SuppressWarnings("unchecked")
+    public Iterator<Span> getEnclosed(final Set<SpanTypeEnum> types, int 
startOffset, int endOffset) {
+        if(startOffset >= (span[1] - span[0])){ //start is outside the span
+            return Collections.<Span>emptySet().iterator();
+        }
+        int startIdx = startOffset < 0 ? span[0] : (span[0]+ startOffset);
+        int endIdx = span[0] + endOffset;
+        if(endIdx <= startIdx) {
+            return Collections.<Span>emptySet().iterator();
+        } else if(endIdx > span[1]){
+            endIdx = span[1];
+        }
+        return IteratorUtils.filteredIterator(getIterator(new 
SubSetHelperSpan(startIdx, endIdx)), 
+            new Predicate() {
+                @Override
+                public boolean evaluate(Object span) {
+                    return types.contains(((Span)span).getType());
+                }
+            });
+    }
     /**
      * Iterator that does not throw {@link ConcurrentModificationException} but
      * considers modifications to the underlying set by using the
@@ -85,13 +108,32 @@ public abstract class SectionImpl extend
      * @return the iterator
      */
     protected Iterator<Span> getIterator(){
-        //the end of this section
-        final Span end = new SubSetHelperSpan(getEnd());
+        return getIterator(null);
+    }
+    /**
+     * Iterator that does not throw {@link ConcurrentModificationException} but
+     * considers modifications to the underlying set by using the
+     * {@link NavigableMap#higherKey(Object)} method for iterating over the
+     * Elements!<p>
+     * This allows to add new {@link Span}s to the {@link Section} while
+     * iterating (e.g. add {@link Token}s and/or {@link Chunk}s while iterating
+     * over the {@link Sentence}s of an {@link AnalysedText})
+     * @param section the (sub-)section of the current section to iterate or
+     * <code>null</code> to iterate the whole section.
+     * @return the iterator
+     */
+    protected Iterator<Span> getIterator(final SubSetHelperSpan section){
+        //create a virtual Span with the end of the section to iterate over
+        final Span end = new SubSetHelperSpan(
+            section == null ? getEnd() : //if no section is defined use the 
parent
+                section.getEnd()); //use the end of the desired section
         return new Iterator<Span>() {
             
             boolean init = false;
             boolean removed = true;
-            private Span span = SectionImpl.this;
+            //init with the first span of the iterator
+            private Span span = section == null ? 
+                    SectionImpl.this : section; 
             
             @Override
             public boolean hasNext() {

Modified: 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextTest.java
URL: 
http://svn.apache.org/viewvc/stanbol/branches/release-0.12/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextTest.java?rev=1633539&r1=1633538&r2=1633539&view=diff
==============================================================================
--- 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextTest.java
 (original)
+++ 
stanbol/branches/release-0.12/enhancer/generic/nlp/src/test/java/org/apache/stanbol/enhancer/nlp/model/AnalysedTextTest.java
 Wed Oct 22 06:37:36 2014
@@ -19,21 +19,26 @@ package org.apache.stanbol.enhancer.nlp.
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
+import java.util.LinkedList;
 import java.util.List;
 import java.util.Map.Entry;
 import java.util.Set;
 
 import org.apache.clerezza.rdf.core.UriRef;
+import org.apache.commons.collections.CollectionUtils;
 import 
org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.nlp.model.Span.SpanTypeEnum;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Annotation;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
+import org.apache.stanbol.enhancer.nlp.utils.NIFHelper;
+import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
@@ -383,6 +388,65 @@ public class AnalysedTextTest {
           (sentences.size()*chunksInSentence),c);
     }
     
+    /**
+     * Tests the {@link Section#getEnclosed(Set, int, int)} method introduced
+     * with <code>0.12.1</code>
+     */
+    @Test
+    public void testSubSectionIteration(){
+        log.info("testSubSectionIteration ...");
+        List<Span> expectedSpans = new ArrayList<Span>();
+        List<Sentence> sentences = new ArrayList<Sentence>();
+        Set<SpanTypeEnum> enabledTypes = EnumSet.of(SpanTypeEnum.Sentence, 
SpanTypeEnum.Token);
+        Iterator<Span> allIt = analysedTextWithData.getEnclosed(enabledTypes);
+        while(allIt.hasNext()){
+            Span s = allIt.next();
+            expectedSpans.add(s);
+            if(s.getType() == SpanTypeEnum.Sentence) {
+                sentences.add((Sentence)s);
+            }
+        }
+        //first test an section that exceeds the end of the text
+        int[] testSpan = new int[]{4,90};
+        Assert.assertEquals(5, assertSectionIterator(analysedTextWithData, 
+            expectedSpans, testSpan, enabledTypes));
+        //second test a section relative to an sentence
+        Sentence lastSent = sentences.get(sentences.size()-1);
+        int [] offsetSpan = new int[]{5,25};
+        Assert.assertEquals(1, assertSectionIterator(lastSent, expectedSpans, 
+            offsetSpan, enabledTypes));
+        
+    }
+
+
+    /**
+     * @param span
+     * @param testSpan
+     */
+    private int assertSectionIterator(Section section, List<Span> span, int[] 
testSpan, Set<SpanTypeEnum> types) {
+        log.info("> assert span {} over {}", Arrays.toString(testSpan), 
section);
+        Iterator<Span> sectionIt = section.getEnclosed(
+            types, testSpan[0], testSpan[1]);
+        int startIdx = section.getStart() + testSpan[0];
+        int endIdx = section.getStart() + testSpan[1];
+        int count = 0;
+        for(Span s : span){
+            if(s.getStart() < startIdx){
+                log.info(" - asserted {} before section", s);
+            } else if(s.getEnd() < endIdx){
+                Assert.assertTrue(sectionIt.hasNext());
+                Assert.assertEquals(s, sectionIt.next());
+                count++;
+                log.info(" - asserted section token {}", s);
+            } else {
+                log.info(" - asserted correct section end", s);
+                Assert.assertFalse(sectionIt.hasNext());
+                break;
+            }
+        }
+        return count;
+    }
+    
     @Test
     public void testAnnotation(){
         List<Value<Number>> values = new ArrayList<Value<Number>>();

svn commit: r1633539 - in /stanbol/branches/release-0.12/enhancer/generic/nlp/src: main/java/org/apache/stanbol/enhancer/nlp/model/ main/java/org/apache/stanbol/enhancer/nlp/model/impl/ test/java/org/apache/stanbol/enhancer/nlp/model/

Reply via email to