Author: pkluegl
Date: Fri May 4 12:56:47 2012
New Revision: 1333919
URL: http://svn.apache.org/viewvc?rev=1333919&view=rev
Log:
UIMA-2397
- reduced memory profile of TextMarkerBasic
- only remember boundaries of annotations but not their parent types
- configurable in code, but not yet by the analysis engine
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
URL:
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
---
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
(original)
+++
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java
Fri May 4 12:56:47 2012
@@ -164,7 +164,6 @@ public class TextMarkerStream extends FS
public void addAnnotation(AnnotationFS annotation, boolean update) {
Type type = annotation.getType();
- TypeSystem typeSystem = cas.getTypeSystem();
Type parent = type;
boolean modified = checkSpan(annotation);
if (modified) {
@@ -172,11 +171,8 @@ public class TextMarkerStream extends FS
}
TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin());
TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd());
- while (parent != null) {
- beginAnchor.addBegin(annotation, parent);
- endAnchor.addEnd(annotation, parent);
- parent = typeSystem.getParent(parent);
- }
+ beginAnchor.addBegin(annotation, parent);
+ endAnchor.addEnd(annotation, parent);
Collection<TextMarkerBasic> basicAnnotationsInWindow =
getAllBasicsInWindow(annotation);
for (TextMarkerBasic basic : basicAnnotationsInWindow) {
basic.addPartOf(type);
@@ -226,7 +222,6 @@ public class TextMarkerStream extends FS
}
public void removeAnnotation(AnnotationFS annotation, Type type) {
- TypeSystem typeSystem = cas.getTypeSystem();
Collection<TextMarkerBasic> basicAnnotationsInWindow =
getAllBasicsInWindow(annotation);
for (TextMarkerBasic basic : basicAnnotationsInWindow) {
basic.removePartOf(type);
@@ -234,11 +229,8 @@ public class TextMarkerStream extends FS
Type parent = type;
TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin());
TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd());
- while (parent != null) {
- beginAnchor.removeBegin(annotation, parent);
- endAnchor.removeEnd(annotation, parent);
- parent = typeSystem.getParent(parent);
- }
+ beginAnchor.removeBegin(annotation, parent);
+ endAnchor.removeEnd(annotation, parent);
if (!(annotation instanceof TextMarkerBasic)) {
cas.removeFsFromIndexes(annotation);
}
@@ -417,6 +409,11 @@ public class TextMarkerStream extends FS
public Collection<TextMarkerBasic> getAllBasicsInWindow(AnnotationFS
windowAnnotation) {
TextMarkerBasic beginAnchor = getBeginAnchor(windowAnnotation.getBegin());
+ if (beginAnchor.getEnd() == windowAnnotation.getEnd()) {
+ Collection<TextMarkerBasic> result = new ArrayList<TextMarkerBasic>(1);
+ result.add(beginAnchor);
+ return result;
+ }
TextMarkerBasic endAnchor = getEndAnchor(windowAnnotation.getEnd());
NavigableSet<TextMarkerBasic> subSet = basics.subSet(beginAnchor, true,
endAnchor, true);
return subSet;
@@ -424,13 +421,16 @@ public class TextMarkerStream extends FS
// if (windowAnnotation instanceof TextMarkerBasic) {
// result.add((TextMarkerBasic) windowAnnotation);
// return result;
- // } else if (windowAnnotation.getBegin() <= documentAnnotation.getBegin()
+ // } else if (windowAnnotation.getBegin() <=
+ // documentAnnotation.getBegin()
// && windowAnnotation.getEnd() >= documentAnnotation.getEnd()) {
// return basics;
// }
- // TextMarkerFrame frame = new TextMarkerFrame(getJCas(),
windowAnnotation.getBegin(),
+ // TextMarkerFrame frame = new TextMarkerFrame(getJCas(),
+ // windowAnnotation.getBegin(),
// windowAnnotation.getEnd());
- // FSIterator<AnnotationFS> iterator =
cas.getAnnotationIndex(basicType).subiterator(frame);
+ // FSIterator<AnnotationFS> iterator =
+ // cas.getAnnotationIndex(basicType).subiterator(frame);
// while (iterator.isValid()) {
// result.add((TextMarkerBasic) iterator.get());
// iterator.moveToNext();
@@ -543,17 +543,22 @@ public class TextMarkerStream extends FS
// System.out.println();
// }
// if (getNextBasic(lastAnnotation) != null
- // && getNextBasic(lastAnnotation).getBegin() ==
lastAnnotation.getBegin()) {
+ // && getNextBasic(lastAnnotation).getBegin() ==
+ // lastAnnotation.getBegin()) {
// System.out.println();
// }
TextMarkerBasic nextBasic = getNextBasic(lastAnnotation);
// TextMarkerBasic nextBasic2 = getNextBasic2(lastAnnotation);
// if (nextBasic != nextBasic2) {
- // String string = nextBasic == null ? "null" : nextBasic.getCoveredText();
- // String string2 = nextBasic == null ? "null" : (nextBasic.getBegin() +
"");
- // System.out.println("nextBasic.getBegin() != nextBasic2.getBegin() " +
string + " "
+ // String string = nextBasic == null ? "null" :
+ // nextBasic.getCoveredText();
+ // String string2 = nextBasic == null ? "null" : (nextBasic.getBegin() +
+ // "");
+ // System.out.println("nextBasic.getBegin() != nextBasic2.getBegin() " +
+ // string + " "
// + nextBasic2.getCoveredText());
- // System.out.println(lastAnnotation.getBegin() + "=" + string2 + "=" +
nextBasic2.getBegin());
+ // System.out.println(lastAnnotation.getBegin() + "=" + string2 + "=" +
+ // nextBasic2.getBegin());
// }
return nextBasic;
}
@@ -562,7 +567,8 @@ public class TextMarkerStream extends FS
TextMarkerBasic pointer = pointerMap.get(previous.getEnd());
if (pointer == null) {
// FIXME: hotfix for ML stuff
- // pointer = new TextMarkerFrame(getJCas(), previous.getEnd()-1,
previous.getEnd());
+ // pointer = new TextMarkerFrame(getJCas(), previous.getEnd()-1,
+ // previous.getEnd());
pointer = (TextMarkerBasic) cas.createAnnotation(basicType,
previous.getEnd() - 1,
previous.getEnd());
pointerMap.put(previous.getEnd(), pointer);
@@ -571,7 +577,8 @@ public class TextMarkerStream extends FS
if (currentIt.isValid()) {
TextMarkerBasic basic = (TextMarkerBasic) currentIt.get();
if (basic.getBegin() == previous.getBegin()) {
- // if (basic.getBegin() >= previous.getBegin() || basic.getEnd() <=
previous.getEnd()) {
+ // if (basic.getBegin() >= previous.getBegin() || basic.getEnd()
+ // <= previous.getEnd()) {
currentIt.moveToNext();
if (currentIt.isValid()) {
return (TextMarkerBasic) currentIt.get();
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
URL:
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
---
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
(original)
+++
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/GetListAction.java
Fri May 4 12:56:47 2012
@@ -66,6 +66,7 @@ public class GetListAction extends Abstr
TextMarkerBasic firstBasic = stream.getFirstBasicInWindow(matched);
Collection<Set<AnnotationFS>> values = firstBasic.getBeginMap().values();
+ // TODO right now, this only works for types that are present and not
for their parent types...
if (TYPES_AT_BEGIN.equals(op)) {
for (Set<AnnotationFS> set : values) {
for (AnnotationFS annotationFS : set) {
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
URL:
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
---
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
(original)
+++
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java
Fri May 4 12:56:47 2012
@@ -44,7 +44,7 @@ public class DefaultSeeder implements Te
} catch (CASException e1) {
}
// do not apply seeding if there are already annotations of this seed type
- if (jCas == null || size != 0) {
+ if (jCas == null || size != 0 || text == null) {
return result;
}
BufferedReader reader = new BufferedReader(new StringReader(text));
Modified:
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
URL:
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java?rev=1333919&r1=1333918&r2=1333919&view=diff
==============================================================================
---
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
(original)
+++
uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java
Fri May 4 12:56:47 2012
@@ -3,27 +3,46 @@ package org.apache.uima.textmarker.type;
import java.util.HashMap;
import java.util.HashSet;
+import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.uima.cas.Type;
+import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JCasRegistry;
import org.apache.uima.jcas.cas.TOP_Type;
import org.apache.uima.jcas.tcas.Annotation;
-/**
- * Updated by JCasGen Wed Jan 11 14:42:26 CET 2012
- * XML source:
D:/work/workspace-uima3/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml
- * @generated */
+/**
+ * Updated by JCasGen Wed Jan 11 14:42:26 CET 2012 XML source:
D:/work/workspace-
+ * uima3/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima
+ * /textmarker/engine/BasicTypeSystem.xml
+ *
+ * @generated
+ */
public class TextMarkerBasic extends Annotation {
- private Set<Type> partOf = new HashSet<Type>(20);
+ private static final int INITIAL_CAPACITY = 2;
- private final Map<Type, Set<AnnotationFS>> beginMap = new HashMap<Type,
Set<AnnotationFS>>(10);
+ private boolean lowMemoryProfile = true;
- private final Map<Type, Set<AnnotationFS>> endMap = new HashMap<Type,
Set<AnnotationFS>>(10);
+ private Set<Type> partOf = new HashSet<Type>(INITIAL_CAPACITY);
+
+ private final Map<Type, Set<AnnotationFS>> beginMap = new HashMap<Type,
Set<AnnotationFS>>(
+ INITIAL_CAPACITY);
+
+ private final Map<Type, Set<AnnotationFS>> endMap = new HashMap<Type,
Set<AnnotationFS>>(
+ INITIAL_CAPACITY);
+
+ public boolean isLowMemoryProfile() {
+ return lowMemoryProfile;
+ }
+
+ public void setLowMemoryProfile(boolean lowMemoryProfile) {
+ this.lowMemoryProfile = lowMemoryProfile;
+ }
public void addPartOf(Type type) {
partOf.add(type);
@@ -38,37 +57,105 @@ public class TextMarkerBasic extends Ann
}
public Set<AnnotationFS> getBeginAnchors(Type type) {
- return beginMap.get(type);
+ Set<AnnotationFS> set = beginMap.get(type);
+ if (lowMemoryProfile) {
+ Set<AnnotationFS> result = new HashSet<AnnotationFS>();
+ if (set != null) {
+ result.addAll(set);
+ }
+ List<Type> subsumedTypes =
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+ for (Type each : subsumedTypes) {
+ Set<AnnotationFS> c = beginMap.get(each);
+ if (c != null) {
+ result.addAll(c);
+ }
+ }
+ return result;
+ } else {
+ return set;
+ }
}
public Set<AnnotationFS> getEndAnchors(Type type) {
- return endMap.get(type);
+ Set<AnnotationFS> set = endMap.get(type);
+ if (lowMemoryProfile) {
+ Set<AnnotationFS> result = new HashSet<AnnotationFS>(set);
+ if (set != null) {
+ result.addAll(set);
+ }
+ List<Type> subsumedTypes =
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+ for (Type each : subsumedTypes) {
+ Set<AnnotationFS> c = endMap.get(each);
+ if (c != null) {
+ result.addAll(c);
+ }
+ }
+ return result;
+ } else {
+ return set;
+ }
}
public boolean beginsWith(Type type) {
- return beginMap.containsKey(type);
+ if (beginMap.containsKey(type)) {
+ return true;
+ }
+ if (lowMemoryProfile) {
+ List<Type> subsumedTypes =
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+ for (Type each : subsumedTypes) {
+ if (beginsWith(each)) {
+ return true;
+ }
+ }
+ }
+ return false;
}
public boolean endsWith(Type type) {
- return endMap.containsKey(type);
+ if (endMap.containsKey(type)) {
+ return true;
+ }
+ if (lowMemoryProfile) {
+ List<Type> subsumedTypes =
getCAS().getTypeSystem().getProperlySubsumedTypes(type);
+ for (Type each : subsumedTypes) {
+ if (endsWith(each)) {
+ return true;
+ }
+ }
+ }
+ return false;
}
public void addBegin(AnnotationFS annotation, Type type) {
Set<AnnotationFS> list = beginMap.get(type);
if (list == null) {
- list = new HashSet<AnnotationFS>();
+ list = new HashSet<AnnotationFS>(INITIAL_CAPACITY);
beginMap.put(type, list);
}
list.add(annotation);
+ if (!lowMemoryProfile) {
+ TypeSystem typeSystem = getCAS().getTypeSystem();
+ Type parent = typeSystem.getParent(type);
+ if (parent != null) {
+ addBegin(annotation, parent);
+ }
+ }
}
public void addEnd(AnnotationFS annotation, Type type) {
Set<AnnotationFS> list = endMap.get(type);
if (list == null) {
- list = new HashSet<AnnotationFS>();
+ list = new HashSet<AnnotationFS>(INITIAL_CAPACITY);
endMap.put(type, list);
}
list.add(annotation);
+ if (!lowMemoryProfile) {
+ TypeSystem typeSystem = getCAS().getTypeSystem();
+ Type parent = typeSystem.getParent(type);
+ if (parent != null) {
+ addEnd(annotation, parent);
+ }
+ }
}
public void removeBegin(AnnotationFS annotation, Type type) {
@@ -79,6 +166,13 @@ public class TextMarkerBasic extends Ann
beginMap.remove(annotation.getType());
}
}
+ if (!lowMemoryProfile) {
+ TypeSystem typeSystem = getCAS().getTypeSystem();
+ Type parent = typeSystem.getParent(type);
+ if (parent != null) {
+ removeBegin(annotation, parent);
+ }
+ }
}
public void removeEnd(AnnotationFS annotation, Type type) {
@@ -89,6 +183,17 @@ public class TextMarkerBasic extends Ann
endMap.remove(annotation.getType());
}
}
+ if (!lowMemoryProfile) {
+ TypeSystem typeSystem = getCAS().getTypeSystem();
+ Type parent = typeSystem.getParent(type);
+ if (parent != null) {
+ removeEnd(annotation, parent);
+ }
+ }
+ }
+
+ public Map<Type, Set<AnnotationFS>> getBeginMap() {
+ return beginMap;
}
/**
@@ -105,15 +210,18 @@ public class TextMarkerBasic extends Ann
/** @generated */
@Override
- public int getTypeIndexID() {return typeIndexID;}
-
+ public int getTypeIndexID() {
+ return typeIndexID;
+ }
+
/**
* Never called. Disable default constructor
*
* @generated
*/
- protected TextMarkerBasic() {}
-
+ protected TextMarkerBasic() {
+ }
+
/**
* Internal - constructor used by generator
*
@@ -123,12 +231,12 @@ public class TextMarkerBasic extends Ann
super(addr, type);
readObject();
}
-
+
/** @generated */
public TextMarkerBasic(JCas jcas) {
super(jcas);
- readObject();
- }
+ readObject();
+ }
/** @generated */
public TextMarkerBasic(JCas jcas, int begin, int end) {
@@ -136,10 +244,13 @@ public class TextMarkerBasic extends Ann
setBegin(begin);
setEnd(end);
readObject();
- }
+ }
- /** <!-- begin-user-doc --> Write your own initialization here <!--
end-user-doc -->
- @generated modifiable */
+ /**
+ * <!-- begin-user-doc --> Write your own initialization here <!--
end-user-doc -->
+ *
+ * @generated modifiable
+ */
private void readObject() {
}
@@ -152,25 +263,30 @@ public class TextMarkerBasic extends Ann
* @generated
*/
public String getReplacement() {
- if (TextMarkerBasic_Type.featOkTst &&
((TextMarkerBasic_Type)jcasType).casFeat_replacement == null)
- jcasType.jcas.throwFeatMissing("replacement",
"org.apache.uima.textmarker.type.TextMarkerBasic");
- return jcasType.ll_cas.ll_getStringValue(addr,
((TextMarkerBasic_Type)jcasType).casFeatCode_replacement);}
-
+ if (TextMarkerBasic_Type.featOkTst
+ && ((TextMarkerBasic_Type) jcasType).casFeat_replacement == null)
+ jcasType.jcas.throwFeatMissing("replacement",
+ "org.apache.uima.textmarker.type.TextMarkerBasic");
+ return jcasType.ll_cas.ll_getStringValue(addr,
+ ((TextMarkerBasic_Type) jcasType).casFeatCode_replacement);
+ }
+
/**
* setter for Replacement - sets
*
* @generated
*/
public void setReplacement(String v) {
- if (TextMarkerBasic_Type.featOkTst &&
((TextMarkerBasic_Type)jcasType).casFeat_replacement == null)
- jcasType.jcas.throwFeatMissing("replacement",
"org.apache.uima.textmarker.type.TextMarkerBasic");
- jcasType.ll_cas.ll_setStringValue(addr,
((TextMarkerBasic_Type)jcasType).casFeatCode_replacement, v);}
- public Map<Type, Set<AnnotationFS>> getEndMap() {
- return endMap;
+ if (TextMarkerBasic_Type.featOkTst
+ && ((TextMarkerBasic_Type) jcasType).casFeat_replacement == null)
+ jcasType.jcas.throwFeatMissing("replacement",
+ "org.apache.uima.textmarker.type.TextMarkerBasic");
+ jcasType.ll_cas.ll_setStringValue(addr,
+ ((TextMarkerBasic_Type) jcasType).casFeatCode_replacement, v);
}
- public Map<Type, Set<AnnotationFS>> getBeginMap() {
- return beginMap;
+ public Map<Type, Set<AnnotationFS>> getEndMap() {
+ return endMap;
}
}