Author: pkluegl
Date: Fri May  4 12:56:47 2012
New Revision: 1333919

URL: http://svn.apache.org/viewvc?rev=1333919&view=rev
Log:
UIMA-2397
- reduced memory profile of TextMarkerBasic
- only remember boundaries of annotations but not their parent types
- configurable in code, but not yet by the analysis engine

Modified:
    
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
    
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
    
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
    
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
 Fri May  4 12:56:47 2012
@@ -164,7 +164,6 @@ public class TextMarkerStream extends FS
 
   public void addAnnotation(AnnotationFS annotation, boolean update) {
     Type type = annotation.getType();
-    TypeSystem typeSystem = cas.getTypeSystem();
     Type parent = type;
     boolean modified = checkSpan(annotation);
     if (modified) {
@@ -172,11 +171,8 @@ public class TextMarkerStream extends FS
     }
     TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin());
     TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd());
-    while (parent != null) {
-      beginAnchor.addBegin(annotation, parent);
-      endAnchor.addEnd(annotation, parent);
-      parent = typeSystem.getParent(parent);
-    }
+    beginAnchor.addBegin(annotation, parent);
+    endAnchor.addEnd(annotation, parent);
     Collection<TextMarkerBasic> basicAnnotationsInWindow = 
getAllBasicsInWindow(annotation);
     for (TextMarkerBasic basic : basicAnnotationsInWindow) {
       basic.addPartOf(type);
@@ -226,7 +222,6 @@ public class TextMarkerStream extends FS
   }
 
   public void removeAnnotation(AnnotationFS annotation, Type type) {
-    TypeSystem typeSystem = cas.getTypeSystem();
     Collection<TextMarkerBasic> basicAnnotationsInWindow = 
getAllBasicsInWindow(annotation);
     for (TextMarkerBasic basic : basicAnnotationsInWindow) {
       basic.removePartOf(type);
@@ -234,11 +229,8 @@ public class TextMarkerStream extends FS
     Type parent = type;
     TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin());
     TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd());
-    while (parent != null) {
-      beginAnchor.removeBegin(annotation, parent);
-      endAnchor.removeEnd(annotation, parent);
-      parent = typeSystem.getParent(parent);
-    }
+    beginAnchor.removeBegin(annotation, parent);
+    endAnchor.removeEnd(annotation, parent);
     if (!(annotation instanceof TextMarkerBasic)) {
       cas.removeFsFromIndexes(annotation);
     }
@@ -417,6 +409,11 @@ public class TextMarkerStream extends FS
   public Collection<TextMarkerBasic> getAllBasicsInWindow(AnnotationFS 
windowAnnotation) {
 
     TextMarkerBasic beginAnchor = getBeginAnchor(windowAnnotation.getBegin());
+    if (beginAnchor.getEnd() == windowAnnotation.getEnd()) {
+      Collection<TextMarkerBasic> result = new ArrayList<TextMarkerBasic>(1);
+      result.add(beginAnchor);
+      return result;
+    }
     TextMarkerBasic endAnchor = getEndAnchor(windowAnnotation.getEnd());
     NavigableSet<TextMarkerBasic> subSet = basics.subSet(beginAnchor, true, 
endAnchor, true);
     return subSet;
@@ -424,13 +421,16 @@ public class TextMarkerStream extends FS
     // if (windowAnnotation instanceof TextMarkerBasic) {
     // result.add((TextMarkerBasic) windowAnnotation);
     // return result;
-    // } else if (windowAnnotation.getBegin() <= documentAnnotation.getBegin()
+    // } else if (windowAnnotation.getBegin() <=
+    // documentAnnotation.getBegin()
     // && windowAnnotation.getEnd() >= documentAnnotation.getEnd()) {
     // return basics;
     // }
-    // TextMarkerFrame frame = new TextMarkerFrame(getJCas(), 
windowAnnotation.getBegin(),
+    // TextMarkerFrame frame = new TextMarkerFrame(getJCas(),
+    // windowAnnotation.getBegin(),
     // windowAnnotation.getEnd());
-    // FSIterator<AnnotationFS> iterator = 
cas.getAnnotationIndex(basicType).subiterator(frame);
+    // FSIterator<AnnotationFS> iterator =
+    // cas.getAnnotationIndex(basicType).subiterator(frame);
     // while (iterator.isValid()) {
     // result.add((TextMarkerBasic) iterator.get());
     // iterator.moveToNext();
@@ -543,17 +543,22 @@ public class TextMarkerStream extends FS
     // System.out.println();
     // }
     // if (getNextBasic(lastAnnotation) != null
-    // && getNextBasic(lastAnnotation).getBegin() == 
lastAnnotation.getBegin()) {
+    // && getNextBasic(lastAnnotation).getBegin() ==
+    // lastAnnotation.getBegin()) {
     // System.out.println();
     // }
     TextMarkerBasic nextBasic = getNextBasic(lastAnnotation);
     // TextMarkerBasic nextBasic2 = getNextBasic2(lastAnnotation);
     // if (nextBasic != nextBasic2) {
-    // String string = nextBasic == null ? "null" : nextBasic.getCoveredText();
-    // String string2 = nextBasic == null ? "null" : (nextBasic.getBegin() + 
"");
-    // System.out.println("nextBasic.getBegin() != nextBasic2.getBegin() " + 
string + " "
+    // String string = nextBasic == null ? "null" :
+    // nextBasic.getCoveredText();
+    // String string2 = nextBasic == null ? "null" : (nextBasic.getBegin() +
+    // "");
+    // System.out.println("nextBasic.getBegin() != nextBasic2.getBegin() " +
+    // string + " "
     // + nextBasic2.getCoveredText());
-    // System.out.println(lastAnnotation.getBegin() + "=" + string2 + "=" + 
nextBasic2.getBegin());
+    // System.out.println(lastAnnotation.getBegin() + "=" + string2 + "=" +
+    // nextBasic2.getBegin());
     // }
     return nextBasic;
   }
@@ -562,7 +567,8 @@ public class TextMarkerStream extends FS
     TextMarkerBasic pointer = pointerMap.get(previous.getEnd());
     if (pointer == null) {
       // FIXME: hotfix for ML stuff
-      // pointer = new TextMarkerFrame(getJCas(), previous.getEnd()-1, 
previous.getEnd());
+      // pointer = new TextMarkerFrame(getJCas(), previous.getEnd()-1,
+      // previous.getEnd());
       pointer = (TextMarkerBasic) cas.createAnnotation(basicType, 
previous.getEnd() - 1,
               previous.getEnd());
       pointerMap.put(previous.getEnd(), pointer);
@@ -571,7 +577,8 @@ public class TextMarkerStream extends FS
     if (currentIt.isValid()) {
       TextMarkerBasic basic = (TextMarkerBasic) currentIt.get();
       if (basic.getBegin() == previous.getBegin()) {
-        // if (basic.getBegin() >= previous.getBegin() || basic.getEnd() <= 
previous.getEnd()) {
+        // if (basic.getBegin() >= previous.getBegin() || basic.getEnd()
+        // <= previous.getEnd()) {
         currentIt.moveToNext();
         if (currentIt.isValid()) {
           return (TextMarkerBasic) currentIt.get();

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
 Fri May  4 12:56:47 2012
@@ -66,6 +66,7 @@ public class GetListAction extends Abstr
 
       TextMarkerBasic firstBasic = stream.getFirstBasicInWindow(matched);
       Collection<Set<AnnotationFS>> values = firstBasic.getBeginMap().values();
+      // TODO right now, this only works for types that are present and not 
for their parent types...
       if (TYPES_AT_BEGIN.equals(op)) {
         for (Set<AnnotationFS> set : values) {
           for (AnnotationFS annotationFS : set) {

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
 Fri May  4 12:56:47 2012
@@ -44,7 +44,7 @@ public class DefaultSeeder implements Te
     } catch (CASException e1) {
     }
     // do not apply seeding if there are already annotations of this seed type
-    if (jCas == null || size != 0) {
+    if (jCas == null || size != 0 || text == null) {
       return result;
     }
     BufferedReader reader = new BufferedReader(new StringReader(text));

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
 Fri May  4 12:56:47 2012
@@ -3,27 +3,46 @@ package org.apache.uima.textmarker.type;
 
 import java.util.HashMap;
 import java.util.HashSet;
+import java.util.List;
 import java.util.Map;
 import java.util.Set;
 
 import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
 import org.apache.uima.cas.text.AnnotationFS;
 import org.apache.uima.jcas.JCas;
 import org.apache.uima.jcas.JCasRegistry;
 import org.apache.uima.jcas.cas.TOP_Type;
 import org.apache.uima.jcas.tcas.Annotation;
 
-/** 
- * Updated by JCasGen Wed Jan 11 14:42:26 CET 2012
- * XML source: 
D:/work/workspace-uima3/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml
- * @generated */
+/**
+ * Updated by JCasGen Wed Jan 11 14:42:26 CET 2012 XML source: 
D:/work/workspace-
+ * uima3/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima
+ * /textmarker/engine/BasicTypeSystem.xml
+ * 
+ * @generated
+ */
 public class TextMarkerBasic extends Annotation {
 
-  private Set<Type> partOf = new HashSet<Type>(20);
+  private static final int INITIAL_CAPACITY = 2;
 
-  private final Map<Type, Set<AnnotationFS>> beginMap = new HashMap<Type, 
Set<AnnotationFS>>(10);
+  private boolean lowMemoryProfile = true;
 
-  private final Map<Type, Set<AnnotationFS>> endMap = new HashMap<Type, 
Set<AnnotationFS>>(10);
+  private Set<Type> partOf = new HashSet<Type>(INITIAL_CAPACITY);
+
+  private final Map<Type, Set<AnnotationFS>> beginMap = new HashMap<Type, 
Set<AnnotationFS>>(
+          INITIAL_CAPACITY);
+
+  private final Map<Type, Set<AnnotationFS>> endMap = new HashMap<Type, 
Set<AnnotationFS>>(
+          INITIAL_CAPACITY);
+
+  public boolean isLowMemoryProfile() {
+    return lowMemoryProfile;
+  }
+
+  public void setLowMemoryProfile(boolean lowMemoryProfile) {
+    this.lowMemoryProfile = lowMemoryProfile;
+  }
 
   public void addPartOf(Type type) {
     partOf.add(type);
@@ -38,37 +57,105 @@ public class TextMarkerBasic extends Ann
   }
 
   public Set<AnnotationFS> getBeginAnchors(Type type) {
-    return beginMap.get(type);
+    Set<AnnotationFS> set = beginMap.get(type);
+    if (lowMemoryProfile) {
+      Set<AnnotationFS> result = new HashSet<AnnotationFS>();
+      if (set != null) {
+        result.addAll(set);
+      }
+      List<Type> subsumedTypes = 
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+      for (Type each : subsumedTypes) {
+        Set<AnnotationFS> c = beginMap.get(each);
+        if (c != null) {
+          result.addAll(c);
+        }
+      }
+      return result;
+    } else {
+      return set;
+    }
   }
 
   public Set<AnnotationFS> getEndAnchors(Type type) {
-    return endMap.get(type);
+    Set<AnnotationFS> set = endMap.get(type);
+    if (lowMemoryProfile) {
+      Set<AnnotationFS> result = new HashSet<AnnotationFS>(set);
+      if (set != null) {
+        result.addAll(set);
+      }
+      List<Type> subsumedTypes = 
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+      for (Type each : subsumedTypes) {
+        Set<AnnotationFS> c = endMap.get(each);
+        if (c != null) {
+          result.addAll(c);
+        }
+      }
+      return result;
+    } else {
+      return set;
+    }
   }
 
   public boolean beginsWith(Type type) {
-    return beginMap.containsKey(type);
+    if (beginMap.containsKey(type)) {
+      return true;
+    }
+    if (lowMemoryProfile) {
+      List<Type> subsumedTypes = 
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+      for (Type each : subsumedTypes) {
+        if (beginsWith(each)) {
+          return true;
+        }
+      }
+    }
+    return false;
   }
 
   public boolean endsWith(Type type) {
-    return endMap.containsKey(type);
+    if (endMap.containsKey(type)) {
+      return true;
+    }
+    if (lowMemoryProfile) {
+      List<Type> subsumedTypes = 
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+      for (Type each : subsumedTypes) {
+        if (endsWith(each)) {
+          return true;
+        }
+      }
+    }
+    return false;
   }
 
   public void addBegin(AnnotationFS annotation, Type type) {
     Set<AnnotationFS> list = beginMap.get(type);
     if (list == null) {
-      list = new HashSet<AnnotationFS>();
+      list = new HashSet<AnnotationFS>(INITIAL_CAPACITY);
       beginMap.put(type, list);
     }
     list.add(annotation);
+    if (!lowMemoryProfile) {
+      TypeSystem typeSystem = getCAS().getTypeSystem();
+      Type parent = typeSystem.getParent(type);
+      if (parent != null) {
+        addBegin(annotation, parent);
+      }
+    }
   }
 
   public void addEnd(AnnotationFS annotation, Type type) {
     Set<AnnotationFS> list = endMap.get(type);
     if (list == null) {
-      list = new HashSet<AnnotationFS>();
+      list = new HashSet<AnnotationFS>(INITIAL_CAPACITY);
       endMap.put(type, list);
     }
     list.add(annotation);
+    if (!lowMemoryProfile) {
+      TypeSystem typeSystem = getCAS().getTypeSystem();
+      Type parent = typeSystem.getParent(type);
+      if (parent != null) {
+        addEnd(annotation, parent);
+      }
+    }
   }
 
   public void removeBegin(AnnotationFS annotation, Type type) {
@@ -79,6 +166,13 @@ public class TextMarkerBasic extends Ann
         beginMap.remove(annotation.getType());
       }
     }
+    if (!lowMemoryProfile) {
+      TypeSystem typeSystem = getCAS().getTypeSystem();
+      Type parent = typeSystem.getParent(type);
+      if (parent != null) {
+        removeBegin(annotation, parent);
+      }
+    }
   }
 
   public void removeEnd(AnnotationFS annotation, Type type) {
@@ -89,6 +183,17 @@ public class TextMarkerBasic extends Ann
         endMap.remove(annotation.getType());
       }
     }
+    if (!lowMemoryProfile) {
+      TypeSystem typeSystem = getCAS().getTypeSystem();
+      Type parent = typeSystem.getParent(type);
+      if (parent != null) {
+        removeEnd(annotation, parent);
+      }
+    }
+  }
+
+  public Map<Type, Set<AnnotationFS>> getBeginMap() {
+    return beginMap;
   }
 
   /**
@@ -105,15 +210,18 @@ public class TextMarkerBasic extends Ann
 
   /** @generated */
   @Override
-  public int getTypeIndexID() {return typeIndexID;}
- 
+  public int getTypeIndexID() {
+    return typeIndexID;
+  }
+
   /**
    * Never called. Disable default constructor
    * 
    * @generated
    */
-  protected TextMarkerBasic() {}
-    
+  protected TextMarkerBasic() {
+  }
+
   /**
    * Internal - constructor used by generator
    * 
@@ -123,12 +231,12 @@ public class TextMarkerBasic extends Ann
     super(addr, type);
     readObject();
   }
-  
+
   /** @generated */
   public TextMarkerBasic(JCas jcas) {
     super(jcas);
-    readObject();   
-  } 
+    readObject();
+  }
 
   /** @generated */
   public TextMarkerBasic(JCas jcas, int begin, int end) {
@@ -136,10 +244,13 @@ public class TextMarkerBasic extends Ann
     setBegin(begin);
     setEnd(end);
     readObject();
-  }   
+  }
 
-  /** <!-- begin-user-doc --> Write your own initialization here <!-- 
end-user-doc -->
-  @generated modifiable */
+  /**
+   * <!-- begin-user-doc --> Write your own initialization here <!-- 
end-user-doc -->
+   * 
+   * @generated modifiable
+   */
   private void readObject() {
   }
 
@@ -152,25 +263,30 @@ public class TextMarkerBasic extends Ann
    * @generated
    */
   public String getReplacement() {
-    if (TextMarkerBasic_Type.featOkTst && 
((TextMarkerBasic_Type)jcasType).casFeat_replacement == null)
-      jcasType.jcas.throwFeatMissing("replacement", 
"org.apache.uima.textmarker.type.TextMarkerBasic");
-    return jcasType.ll_cas.ll_getStringValue(addr, 
((TextMarkerBasic_Type)jcasType).casFeatCode_replacement);}
-    
+    if (TextMarkerBasic_Type.featOkTst
+            && ((TextMarkerBasic_Type) jcasType).casFeat_replacement == null)
+      jcasType.jcas.throwFeatMissing("replacement",
+              "org.apache.uima.textmarker.type.TextMarkerBasic");
+    return jcasType.ll_cas.ll_getStringValue(addr,
+            ((TextMarkerBasic_Type) jcasType).casFeatCode_replacement);
+  }
+
   /**
    * setter for Replacement - sets
    * 
    * @generated
    */
   public void setReplacement(String v) {
-    if (TextMarkerBasic_Type.featOkTst && 
((TextMarkerBasic_Type)jcasType).casFeat_replacement == null)
-      jcasType.jcas.throwFeatMissing("replacement", 
"org.apache.uima.textmarker.type.TextMarkerBasic");
-    jcasType.ll_cas.ll_setStringValue(addr, 
((TextMarkerBasic_Type)jcasType).casFeatCode_replacement, v);}    
-          public Map<Type, Set<AnnotationFS>> getEndMap() {
-    return endMap;
+    if (TextMarkerBasic_Type.featOkTst
+            && ((TextMarkerBasic_Type) jcasType).casFeat_replacement == null)
+      jcasType.jcas.throwFeatMissing("replacement",
+              "org.apache.uima.textmarker.type.TextMarkerBasic");
+    jcasType.ll_cas.ll_setStringValue(addr,
+            ((TextMarkerBasic_Type) jcasType).casFeatCode_replacement, v);
   }
 
-  public Map<Type, Set<AnnotationFS>> getBeginMap() {
-    return beginMap;
+  public Map<Type, Set<AnnotationFS>> getEndMap() {
+    return endMap;
   }
 
 }


Reply via email to