Author: pkluegl Date: Wed Jan 11 18:20:10 2012 New Revision: 1230176 URL: http://svn.apache.org/viewvc?rev=1230176&view=rev Log: UIMA-2233 Seeding annotations and inference annotations are torn apart now. Unit tests and a bigger project work correctly with the changes, but the explanation component returns unreasonable rule behavior. Either the explanation component is broken or the test worked by accident. Further testing is required. Changed parameters of engine descriptor. Additionally, removed wrong references to CEV plugin in IDE perspective.
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TokenSeed.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TokenSeed_Type.java Removed: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SourceLexer.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/BasicAnnotation.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/BasicAnnotation_Type.java Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/FilterManager.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicEngine.xml uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/TextMarkerEngine.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/parser/TextMarkerParser.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.flex uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ALL.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ALL_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/AMP.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/AMP_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ANY.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ANY_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/BREAK.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/BREAK_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/CAP.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/CAP_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/COLON.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/COLON_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/COMMA.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/COMMA_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/CW.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/CW_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugBlockApply.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugBlockApply_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugEvaluatedCondition.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugEvaluatedCondition_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugFailedRuleMatch.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugFailedRuleMatch_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugMatchedRuleMatch.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugMatchedRuleMatch_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleApply.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleApply_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleElementMatch.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleElementMatch_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleElementMatches.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleElementMatches_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleMatch.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugRuleMatch_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugScriptApply.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/DebugScriptApply_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/Document.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/Document_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/EXCLAMATION.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/EXCLAMATION_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/EvalAnnotation.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/EvalAnnotation_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/FalseNegative.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/FalseNegative_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/FalsePositive.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/FalsePositive_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/MARKUP.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/MARKUP_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/NBSP.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/NBSP_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/NUM.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/NUM_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/PERIOD.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/PERIOD_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/PM.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/PM_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ProfiledAnnotation.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/ProfiledAnnotation_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/QUESTION.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/QUESTION_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SEMICOLON.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SEMICOLON_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SENTENCEEND.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SENTENCEEND_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SPACE.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SPACE_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SPECIAL.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SPECIAL_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SW.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/SW_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/Statistics.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/Statistics_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerAnnotation.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerAnnotation_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerBasic_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerColoring.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerColoring_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerFrame.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TextMarkerFrame_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TruePositive.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/TruePositive_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/W.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/WS.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/WS_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/type/W_Type.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/ui/TextMarkerPerspective.java Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/FilterManager.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/FilterManager.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/FilterManager.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/FilterManager.java Wed Jan 11 18:20:10 2012 @@ -232,12 +232,8 @@ public class FilterManager { windowAnnotation); FSIterator<AnnotationFS> iterator = cas.createFilteredIterator(windowIt, createCurrentConstraint(false)); - // FSIterator<AnnotationFS> iterator = cas.createFilteredIterator(basic, - // createCurrentConstraint(false)); return iterator; } else { - // FSIterator<AnnotationFS> iterator = cas.createFilteredIterator(basic, - // createCurrentConstraint(false)); FSIterator<AnnotationFS> iterator = cas.createFilteredIterator( cas.getAnnotationIndex(basicType).iterator(), createCurrentConstraint(false)); return iterator; Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerEnvironment.java Wed Jan 11 18:20:10 2012 @@ -348,6 +348,36 @@ public class TextMarkerEnvironment { return getVariableValue(name, Object.class); } +// public Object getLiteralValue(String var, Object value) { +// if (ownsVariable(var)) { +// Class<?> clazz = variableTypes.get(var); +// if (value instanceof NumberExpression) { +// NumberExpression ne = (NumberExpression) value; +// if (clazz.equals(Integer.class)) { +// return ne.getIntegerValue(owner); +// } else if (clazz.equals(Double.class)) { +// return ne.getDoubleValue(owner); +// } else if (clazz.equals(Float.class)) { +// return ne.getFloatValue(owner); +// } else if (clazz.equals(String.class)) { +// return ne.getStringValue(owner); +// } +// } else if (clazz.equals(String.class) && value instanceof StringExpression) { +// StringExpression se = (StringExpression) value; +// return se.getStringValue(owner); +// } else if (clazz.equals(Boolean.class) && value instanceof BooleanExpression) { +// BooleanExpression be = (BooleanExpression) value; +// return be.getBooleanValue(owner); +// } else if(clazz.equals(TextMarkerWordList.class)||clazz.equals(TextMarkerTable.class) ) { +// // TODO: refactor this: lists are handled by setVariableValue... resources should be ae resources in future! +// return value; +// } +// return null; +// } else { +// return owner.getParent().getEnvironment().getLiteralValue(var, value); +// } +// } + public Object getLiteralValue(String var, Object value) { if (ownsVariable(var)) { Class<?> clazz = variableTypes.get(var); @@ -369,12 +399,34 @@ public class TextMarkerEnvironment { BooleanExpression be = (BooleanExpression) value; return be.getBooleanValue(owner); } + if (clazz.equals(TextMarkerWordList.class) && value instanceof LiteralWordListExpression) { + LiteralWordListExpression lle = (LiteralWordListExpression) value; + String path = lle.getText(); + TextMarkerWordList wordList = getWordList(path); + return wordList; + } else if (clazz.equals(TextMarkerWordList.class)) { + TextMarkerWordList list = getWordList((String) value); + return list; + } else if (clazz.equals(TextMarkerTable.class) && value instanceof LiteralWordTableExpression) { + LiteralWordTableExpression lte = (LiteralWordTableExpression) value; + String path = lte.getText(); + TextMarkerTable table = getWordTable(path); + return table; + } else if (clazz.equals(TextMarkerTable.class)) { + TextMarkerTable table = getWordTable((String) value); + return table; + } else if (clazz.equals(List.class) && value instanceof ListExpression) { + List list = getList((ListExpression) value); + return list; + } + return null; } else { return owner.getParent().getEnvironment().getLiteralValue(var, value); } } - + + public void setInitialVariableValue(String var, Object value) { if (ownsVariable(var)) { initializedVariables.put(var, value); @@ -387,36 +439,48 @@ public class TextMarkerEnvironment { public void setVariableValue(String var, Object value) { if (ownsVariable(var)) { Class<?> clazz = variableTypes.get(var); - if (clazz.equals(TextMarkerWordList.class) && value instanceof LiteralWordListExpression) { - LiteralWordListExpression lle = (LiteralWordListExpression) value; - String path = lle.getText(); - TextMarkerWordList wordList = getWordList(path); - variableValues.put(var, wordList); - } else if (clazz.equals(TextMarkerWordList.class)) { - TextMarkerWordList list = getWordList((String) value); - variableValues.put(var, list); - } else if (clazz.equals(TextMarkerTable.class) && value instanceof LiteralWordTableExpression) { - LiteralWordTableExpression lte = (LiteralWordTableExpression) value; - String path = lte.getText(); - TextMarkerTable table = getWordTable(path); - variableValues.put(var, table); - } else if (clazz.equals(TextMarkerTable.class)) { - TextMarkerTable table = getWordTable((String) value); - variableValues.put(var, table); - } else if (clazz.equals(List.class) && value instanceof ListExpression) { - List list = getList((ListExpression) value); - variableValues.put(var, list); - } else { if (value == null) { value = getInitialValue(var, clazz); } variableValues.put(var, value); - } } else if (owner.getParent() != null) { owner.getParent().getEnvironment().setVariableValue(var, value); } } +// public void setVariableValue(String var, Object value) { +// if (ownsVariable(var)) { +// Class<?> clazz = variableTypes.get(var); +// if (clazz.equals(TextMarkerWordList.class) && value instanceof LiteralWordListExpression) { +// LiteralWordListExpression lle = (LiteralWordListExpression) value; +// String path = lle.getText(); +// TextMarkerWordList wordList = getWordList(path); +// variableValues.put(var, wordList); +// } else if (clazz.equals(TextMarkerWordList.class)) { +// TextMarkerWordList list = getWordList((String) value); +// variableValues.put(var, list); +// } else if (clazz.equals(TextMarkerTable.class) && value instanceof LiteralWordTableExpression) { +// LiteralWordTableExpression lte = (LiteralWordTableExpression) value; +// String path = lte.getText(); +// TextMarkerTable table = getWordTable(path); +// variableValues.put(var, table); +// } else if (clazz.equals(TextMarkerTable.class)) { +// TextMarkerTable table = getWordTable((String) value); +// variableValues.put(var, table); +// } else if (clazz.equals(List.class) && value instanceof ListExpression) { +// List list = getList((ListExpression) value); +// variableValues.put(var, list); +// } else { +// if (value == null) { +// value = getInitialValue(var, clazz); +// } +// variableValues.put(var, value); +// } +// } else if (owner.getParent() != null) { +// owner.getParent().getEnvironment().setVariableValue(var, value); +// } +// } + private List getList(ListExpression value) { if (value instanceof SimpleBooleanListExpression) { SimpleBooleanListExpression e = (SimpleBooleanListExpression) value; Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java Wed Jan 11 18:20:10 2012 @@ -20,11 +20,15 @@ package org.apache.uima.textmarker; import java.util.ArrayList; +import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.Map.Entry; +import java.util.NavigableSet; import java.util.NoSuchElementException; import java.util.TreeMap; +import java.util.TreeSet; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; @@ -36,31 +40,29 @@ import org.apache.uima.cas.Type; import org.apache.uima.cas.TypeSystem; import org.apache.uima.cas.impl.FSIteratorImplBase; import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.cas.text.AnnotationIndex; import org.apache.uima.jcas.JCas; import org.apache.uima.jcas.tcas.Annotation; import org.apache.uima.jcas.tcas.DocumentAnnotation; import org.apache.uima.textmarker.rule.RuleElementMatch; import org.apache.uima.textmarker.type.TextMarkerAnnotation; import org.apache.uima.textmarker.type.TextMarkerBasic; -import org.apache.uima.textmarker.type.TextMarkerFrame; public class TextMarkerStream extends FSIteratorImplBase<AnnotationFS> { private final CAS cas; - private final FSIterator<AnnotationFS> basicIt; + private FSIterator<AnnotationFS> basicIt; private FSIterator<AnnotationFS> currentIt; private AnnotationFS documentAnnotation; - private TextMarkerBasic firstBasic; - private Type documentAnnotationType; private Type basicType; - private final List<TextMarkerBasic> basics; + private final TreeSet<TextMarkerBasic> basics; private TreeMap<Integer, TextMarkerBasic> beginAnchors; @@ -76,107 +78,153 @@ public class TextMarkerStream extends FS private double anchoringFactor; - protected TextMarkerStream(CAS cas, FSIterator<AnnotationFS> basic, - FSIterator<AnnotationFS> current, Type basicType, FilterManager filter) { + protected TextMarkerStream(CAS cas, FSIterator<AnnotationFS> current, Type basicType, + FilterManager filter) { super(); this.cas = cas; this.filter = filter; this.basicType = basicType; AnnotationFS additionalWindow = filter.getWindowAnnotation(); - if (additionalWindow != null) { - this.basicIt = cas.getAnnotationIndex(basicType).subiterator(additionalWindow); - } else { - this.basicIt = basic; - } - if (current == null) { - // TODO use here a subiterator?? - currentIt = filter.createFilteredIterator(cas, basicType); - // currentIt = cas.createFilteredIterator(basic, filter.getDefaultConstraint()); - } else { - currentIt = current; - } + updateIterators(cas, basicType, filter, additionalWindow); // really an if? sub it of basic should fix this if (additionalWindow == null) { documentAnnotation = (DocumentAnnotation) getJCas().getDocumentAnnotationFs(); documentAnnotationType = getCas().getDocumentAnnotation().getType(); basicIt.moveToFirst(); - if (basicIt.isValid()) { - firstBasic = (TextMarkerBasic) basicIt.get(); - } } else { documentAnnotation = additionalWindow; documentAnnotationType = filter.getWindowType(); - firstBasic = getFirstBasicInWindow(additionalWindow, basic); } - // really faster??? - basics = new ArrayList<TextMarkerBasic>(); + // // really faster??? + // TODO this needs to be changed!! use collection of prior stream + org.apache.uima.textmarker.rule.AnnotationComparator comparator = new org.apache.uima.textmarker.rule.AnnotationComparator(); + basics = new TreeSet<TextMarkerBasic>(comparator); beginAnchors = new TreeMap<Integer, TextMarkerBasic>(); endAnchors = new TreeMap<Integer, TextMarkerBasic>(); FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex(basicType).subiterator( documentAnnotation); while (iterator.isValid()) { - TextMarkerBasic e = (TextMarkerBasic) iterator.get(); beginAnchors.put(e.getBegin(), e); endAnchors.put(e.getEnd(), e); - basics.add(e); iterator.moveToNext(); } } - public TextMarkerStream(CAS cas, FSIterator<AnnotationFS> basic, Type basicType, - FilterManager filter) { - this(cas, basic, null, basicType, filter); + private void updateIterators(AnnotationFS additionalWindow) { + updateIterators(cas, basicType, filter, additionalWindow); + } + + private void updateIterators(CAS cas, Type basicType, + FilterManager filter, AnnotationFS additionalWindow) { + if (additionalWindow != null) { + this.basicIt = cas.getAnnotationIndex(basicType).subiterator(additionalWindow); + } else { + this.basicIt = cas.getAnnotationIndex(basicType).iterator(); + } + currentIt = filter.createFilteredIterator(cas, basicType); } - // public void addAnnotation(TextMarkerBasic anchor, AnnotationFS annotation) { - // if (anchor == null) - // return; - // Type type = annotation.getType(); - // String name = type.getName(); - // TypeSystem typeSystem = cas.getTypeSystem(); - // Type parent = type; - // while (parent != null) { - // // anchor.setAnnotation(parent.getName(), annotation, parent == type); - // TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin()); - // TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd()); - // beginAnchor.addBegin(annotation); - // endAnchor.addEnd(annotation); - // parent = typeSystem.getParent(parent); - // } - // List<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(annotation); - // for (TextMarkerBasic basic : basicAnnotationsInWindow) { - // basic.addPartOf(name); - // } - // } + public TextMarkerStream(CAS cas, Type basicType, FilterManager filter) { + this(cas, null, basicType, filter); + } + + public void initalizeBasics() { + AnnotationIndex<AnnotationFS> annotationIndex = cas.getAnnotationIndex(); + TreeSet<Integer> anchors = new TreeSet<Integer>(); + for (AnnotationFS a : annotationIndex) { + anchors.add(a.getBegin()); + anchors.add(a.getEnd()); + } + while (anchors.size() >= 2) { + Integer first = anchors.pollFirst(); + Integer second = anchors.first(); + TextMarkerBasic newTMB = new TextMarkerBasic(getJCas(), first, second); + beginAnchors.put(first, newTMB); + endAnchors.put(second, newTMB); + basics.add(newTMB); + cas.addFsToIndexes(newTMB); + } + for (AnnotationFS a : annotationIndex) { + if (!a.getType().equals(basicType)) { + addAnnotation(a, false); + } + } + updateIterators(documentAnnotation); + } public void addAnnotation(AnnotationFS annotation) { + addAnnotation(annotation, false); + } + + public void addAnnotation(AnnotationFS annotation, boolean update) { Type type = annotation.getType(); TypeSystem typeSystem = cas.getTypeSystem(); Type parent = type; + boolean modified = checkSpan(annotation); + if (modified) { + updateIterators(filter.getWindowAnnotation()); + } + TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin()); + TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd()); while (parent != null) { - // anchor.setAnnotation(parent.getName(), annotation, parent == type); - TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin()); - TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd()); beginAnchor.addBegin(annotation, parent); endAnchor.addEnd(annotation, parent); parent = typeSystem.getParent(parent); } - List<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(annotation); + Collection<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(annotation); for (TextMarkerBasic basic : basicAnnotationsInWindow) { basic.addPartOf(type); } } + private boolean checkSpan(AnnotationFS annotation) { + boolean result = false; + int begin = annotation.getBegin(); + int end = annotation.getEnd(); + TextMarkerBasic beginAnchor = getBeginAnchor(begin); + TextMarkerBasic endAnchor = getEndAnchor(end); + if (beginAnchor != null && endAnchor != null) { + result = false; + } else { + if (beginAnchor == null) { + result |= checkAnchor(begin); + } + if (endAnchor == null) { + result |= checkAnchor(end); + } + } + return result; + } + + private boolean checkAnchor(int anchor) { + Entry<Integer, TextMarkerBasic> floorEntry = endAnchors.floorEntry(anchor); + Entry<Integer, TextMarkerBasic> ceilingEntry = endAnchors.ceilingEntry(anchor); + if (floorEntry != null && ceilingEntry != null) { + TextMarkerBasic floor = floorEntry.getValue(); + TextMarkerBasic ceiling = ceilingEntry.getValue(); + cas.removeFsFromIndexes(floor); + floor.setEnd(anchor); + AnnotationFS createAnnotation = cas.createAnnotation(basicType, anchor, ceiling.getBegin()); + cas.addFsToIndexes(floor); + cas.addFsToIndexes(createAnnotation); + return true; + } else { + // TODO this should never happen! test it and remove the assert! + assert (false); + } + return false; + } + public void removeAnnotation(AnnotationFS annotationFS) { removeAnnotation(annotationFS, annotationFS.getType()); } public void removeAnnotation(AnnotationFS annotation, Type type) { TypeSystem typeSystem = cas.getTypeSystem(); - List<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(annotation); + Collection<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(annotation); for (TextMarkerBasic basic : basicAnnotationsInWindow) { basic.removePartOf(type); } @@ -194,30 +242,6 @@ public class TextMarkerStream extends FS } - // public void removeAnnotation(TextMarkerBasic anchor, Type type) { - // TypeSystem typeSystem = cas.getTypeSystem(); - // String name = type.getName(); - // AnnotationFS expandAnchor = expandAnchor(anchor, type, true); - // if (expandAnchor == null) { - // // there is no annotation to remove - // return; - // } - // List<TextMarkerBasic> basicAnnotationsInWindow = getAllBasicsInWindow(expandAnchor); - // for (TextMarkerBasic basic : basicAnnotationsInWindow) { - // basic.removePartOf(name); - // } - // Type parent = type; - // TextMarkerBasic beginAnchor = getBeginAnchor(annotation.getBegin()); - // TextMarkerBasic endAnchor = getEndAnchor(annotation.getEnd()); - // while (parent != null) { - // anchor.removeAnnotation(parent.getName(), parent == type); - // parent = typeSystem.getParent(parent); - // } - // if (!(expandAnchor instanceof TextMarkerBasic)) { - // cas.removeFsFromIndexes(expandAnchor); - // } - // } - public FSIterator<AnnotationFS> getFilteredBasicIterator(FSMatchConstraint constraint) { ConstraintFactory cf = cas.getConstraintFactory(); FSMatchConstraint matchConstraint = cf.and(constraint, filter.getDefaultConstraint()); @@ -239,7 +263,7 @@ public class TextMarkerStream extends FS } public FSIterator<AnnotationFS> copy() { - return new TextMarkerStream(cas, basicIt.copy(), currentIt.copy(), basicType, filter); + return new TextMarkerStream(cas, currentIt.copy(), basicType, filter); } public AnnotationFS get() throws NoSuchElementException { @@ -388,23 +412,28 @@ public class TextMarkerStream extends FS return result; } - public List<TextMarkerBasic> getAllBasicsInWindow(AnnotationFS windowAnnotation) { - List<TextMarkerBasic> result = new ArrayList<TextMarkerBasic>(); - if (windowAnnotation instanceof TextMarkerBasic) { - result.add((TextMarkerBasic) windowAnnotation); - return result; - } else if (windowAnnotation.getBegin() <= documentAnnotation.getBegin() - && windowAnnotation.getEnd() >= documentAnnotation.getEnd()) { - return basics; - } - TextMarkerFrame frame = new TextMarkerFrame(getJCas(), windowAnnotation.getBegin(), - windowAnnotation.getEnd()); - FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex(basicType).subiterator(frame); - while (iterator.isValid()) { - result.add((TextMarkerBasic) iterator.get()); - iterator.moveToNext(); - } - return result; + public Collection<TextMarkerBasic> getAllBasicsInWindow(AnnotationFS windowAnnotation) { + + TextMarkerBasic beginAnchor = getBeginAnchor(windowAnnotation.getBegin()); + TextMarkerBasic endAnchor = getEndAnchor(windowAnnotation.getEnd()); + NavigableSet<TextMarkerBasic> subSet = basics.subSet(beginAnchor, true, endAnchor, true); + return subSet; + // List<TextMarkerBasic> result = new ArrayList<TextMarkerBasic>(); + // if (windowAnnotation instanceof TextMarkerBasic) { + // result.add((TextMarkerBasic) windowAnnotation); + // return result; + // } else if (windowAnnotation.getBegin() <= documentAnnotation.getBegin() + // && windowAnnotation.getEnd() >= documentAnnotation.getEnd()) { + // return basics; + // } + // TextMarkerFrame frame = new TextMarkerFrame(getJCas(), windowAnnotation.getBegin(), + // windowAnnotation.getEnd()); + // FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex(basicType).subiterator(frame); + // while (iterator.isValid()) { + // result.add((TextMarkerBasic) iterator.get()); + // iterator.moveToNext(); + // } + // return result; } public List<TextMarkerBasic> getBasicsInWindow(AnnotationFS windowAnnotation) { @@ -501,7 +530,7 @@ public class TextMarkerStream extends FS } public TextMarkerBasic getFirstBasicOfAll() { - return firstBasic; + return basics.first(); } public Type getDocumentAnnotationType() { Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicEngine.xml URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicEngine.xml?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicEngine.xml (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicEngine.xml Wed Jan 11 18:20:10 2012 @@ -1,23 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - <analysisEngineDescription xmlns="http://uima.apache.org/resourceSpecifier"> <frameworkImplementation>org.apache.uima.java</frameworkImplementation> <primitive>true</primitive> @@ -131,12 +112,6 @@ <mandatory>false</mandatory> </configurationParameter> <configurationParameter> - <name>useBasics</name> - <type>String</type> - <multiValued>false</multiValued> - <mandatory>false</mandatory> - </configurationParameter> - <configurationParameter> <name>removeBasics</name> <type>Boolean</type> <multiValued>false</multiValued> Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/BasicTypeSystem.xml Wed Jan 11 18:20:10 2012 @@ -1,23 +1,4 @@ <?xml version="1.0" encoding="UTF-8"?> -<!-- - Licensed to the Apache Software Foundation (ASF) under one - or more contributor license agreements. See the NOTICE file - distributed with this work for additional information - regarding copyright ownership. The ASF licenses this file - to you under the Apache License, Version 2.0 (the - "License"); you may not use this file except in compliance - with the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, - software distributed under the License is distributed on an - "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - KIND, either express or implied. See the License for the - specific language governing permissions and limitations - under the License. ---> - <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier"> <name>BasicTypeSystem</name> <description/> @@ -30,7 +11,7 @@ <typeDescription> <name>org.apache.uima.textmarker.type.ALL</name> <description/> - <supertypeName>org.apache.uima.textmarker.type.BasicAnnotation</supertypeName> + <supertypeName>org.apache.uima.textmarker.type.TokenSeed</supertypeName> </typeDescription> <typeDescription> <name>org.apache.uima.textmarker.type.ANY</name> @@ -143,9 +124,9 @@ <supertypeName>uima.tcas.DocumentAnnotation</supertypeName> </typeDescription> <typeDescription> - <name>org.apache.uima.textmarker.type.BasicAnnotation</name> + <name>org.apache.uima.textmarker.type.TokenSeed</name> <description/> - <supertypeName>org.apache.uima.textmarker.type.TextMarkerBasic</supertypeName> + <supertypeName>uima.tcas.Annotation</supertypeName> </typeDescription> </types> </typeSystemDescription> Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/TextMarkerEngine.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/TextMarkerEngine.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/TextMarkerEngine.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/engine/TextMarkerEngine.java Wed Jan 11 18:20:10 2012 @@ -80,8 +80,6 @@ public class TextMarkerEngine extends JC public static final String SEEDERS = "seeders"; - public static final String USE_BASICS = "useBasics"; - public static final String REMOVE_BASICS = "removeBasics"; public static final String SCRIPT_PATHS = "scriptPaths"; @@ -122,8 +120,6 @@ public class TextMarkerEngine extends JC private String[] seeders; - private String useBasics; - private Boolean createDebugInfo; private String[] createDebugOnlyFor; @@ -176,6 +172,8 @@ public class TextMarkerEngine extends JC private boolean initialized = false; + + @Override public void initialize(UimaContext aContext) throws ResourceInitializationException { super.initialize(aContext); @@ -183,7 +181,6 @@ public class TextMarkerEngine extends JC aContext = context; } seeders = (String[]) aContext.getConfigParameterValue(SEEDERS); - useBasics = (String) aContext.getConfigParameterValue(USE_BASICS); removeBasics = (Boolean) aContext.getConfigParameterValue(REMOVE_BASICS); scriptPaths = (String[]) aContext.getConfigParameterValue(SCRIPT_PATHS); descriptorPaths = (String[]) aContext.getConfigParameterValue(DESCRIPTOR_PATHS); @@ -384,14 +381,18 @@ public class TextMarkerEngine extends JC filterTypes.add(type); } } - FilterManager filter = new FilterManager(filterTypes, filterTags, cas); Type basicType = typeSystem.getType(BASIC_TYPE); - if (seeders != null) { - // not already contains that basics: - if (cas.getAnnotationIndex(basicType).size() <= 0) { + seedAnnotations(cas); + TextMarkerStream stream = new TextMarkerStream(cas, basicType, filter); + stream.initalizeBasics(); + return stream; + } + + private void seedAnnotations(CAS cas) throws AnalysisEngineProcessException { + if (seeders != null) { for (String seederClass : seeders) { Class<?> loadClass = null; try { @@ -405,44 +406,16 @@ public class TextMarkerEngine extends JC } catch (Exception e) { throw new AnalysisEngineProcessException(e); } - try { TextMarkerAnnotationSeeder seeder = (TextMarkerAnnotationSeeder) newInstance; seeder.seed(cas.getDocumentText(), cas); - } catch (Exception e) { throw new AnalysisEngineProcessException(e); } } - - } - - } else if (useBasics != null) { - // thats the case if predefined annotation should be used for inference - Type givenType = typeSystem.getType(useBasics); - FSIterator<AnnotationFS> iterator = cas.getAnnotationIndex(givenType).iterator(); - while (iterator.isValid()) { - AnnotationFS fs = iterator.get(); - AnnotationFS createAnnotation = cas.createAnnotation(basicType, fs.getBegin(), fs.getEnd()); - cas.addFsToIndexes(createAnnotation); - iterator.moveToNext(); - } } - - FSIterator<AnnotationFS> it = cas.getAnnotationIndex(basicType).iterator(); - - TextMarkerStream stream = new TextMarkerStream(cas, it, basicType, filter); - - // TODO find a better solution for this!! -> feature request open - FSIterator<AnnotationFS> others = cas.getAnnotationIndex().iterator(); - while (others.isValid()) { - AnnotationFS a = (AnnotationFS) others.get(); - stream.addAnnotation(a); - others.moveToNext(); - } - return stream; } - + private void initializeScript() throws AnalysisEngineProcessException { String scriptLocation = locate(mainScript, scriptPaths, ".tm"); if (scriptLocation == null) { Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/parser/TextMarkerParser.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/parser/TextMarkerParser.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/parser/TextMarkerParser.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/parser/TextMarkerParser.java Wed Jan 11 18:20:10 2012 @@ -41,18 +41,17 @@ import org.antlr.runtime.RecognizerShare import org.antlr.runtime.Token; import org.antlr.runtime.TokenStream; import org.apache.uima.resource.metadata.TypeSystemDescription; - -import org.apache.uima.textmarker.action.AbstractTextMarkerAction; -import org.apache.uima.textmarker.action.ActionFactory; -import org.apache.uima.textmarker.condition.AbstractTextMarkerCondition; -import org.apache.uima.textmarker.condition.ConditionFactory; import org.apache.uima.textmarker.TextMarkerAutomataBlock; +import org.apache.uima.textmarker.TextMarkerAutomataFactory; import org.apache.uima.textmarker.TextMarkerBlock; import org.apache.uima.textmarker.TextMarkerModule; import org.apache.uima.textmarker.TextMarkerScriptBlock; import org.apache.uima.textmarker.TextMarkerScriptFactory; -import org.apache.uima.textmarker.TextMarkerAutomataFactory; import org.apache.uima.textmarker.TextMarkerStatement; +import org.apache.uima.textmarker.action.AbstractTextMarkerAction; +import org.apache.uima.textmarker.action.ActionFactory; +import org.apache.uima.textmarker.condition.AbstractTextMarkerCondition; +import org.apache.uima.textmarker.condition.ConditionFactory; import org.apache.uima.textmarker.expression.ExpressionFactory; import org.apache.uima.textmarker.expression.TextMarkerExpression; import org.apache.uima.textmarker.expression.bool.BooleanExpression; @@ -76,14 +75,6 @@ import org.apache.uima.textmarker.rule.T import org.apache.uima.textmarker.rule.TextMarkerRuleElement; import org.apache.uima.textmarker.rule.quantifier.RuleElementQuantifier; - -import org.antlr.runtime.*; -import java.util.Stack; -import java.util.List; -import java.util.ArrayList; -import java.util.Map; -import java.util.HashMap; - @SuppressWarnings({"all", "warnings", "unchecked"}) public class TextMarkerParser extends Parser { public static final String[] tokenNames = new String[] { Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/DefaultSeeder.java Wed Jan 11 18:20:10 2012 @@ -24,25 +24,35 @@ import java.io.StringReader; import org.apache.uima.cas.CAS; import org.apache.uima.cas.CASException; -import org.apache.uima.jcas.tcas.Annotation; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.jcas.JCas; +import org.apache.uima.textmarker.type.TokenSeed; public class DefaultSeeder implements TextMarkerAnnotationSeeder { public void seed(String text, CAS cas) { - BufferedReader reader = new BufferedReader(new StringReader(text)); - final SeedLexer sourceLexer = new SeedLexer(reader); + JCas jCas = null; + int size = 0; try { - sourceLexer.setJCas(cas.getJCas()); + jCas = cas.getJCas(); + size = jCas.getAnnotationIndex(TokenSeed.type).size(); } catch (CASException e1) { } - Annotation a = null; + // do not apply seeding if there are already annotations of this seed type + if (jCas == null || size != 0) { + return; + } + BufferedReader reader = new BufferedReader(new StringReader(text)); + final SeedLexer sourceLexer = new SeedLexer(reader); + sourceLexer.setJCas(jCas); + AnnotationFS a = null; try { a = sourceLexer.yylex(); } catch (Exception e) { } while (a != null) { - a.addToIndexes(); + cas.addFsToIndexes(a); try { a = sourceLexer.yylex(); } catch (Exception e) { Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.flex URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.flex?rev=1230176&r1=1230175&r2=1230176&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.flex (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/seed/SeedLexer.flex Wed Jan 11 18:20:10 2012 @@ -18,13 +18,13 @@ */ -package org.apache.uima.textmarker.scanner; +package org.apache.uima.textmarker.seed; import java.util.*; import java.util.regex.*; +import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.jcas.JCas; -import org.apache.uima.textmarker.type.TextMarkerBasic; import org.apache.uima.textmarker.type.AMP; import org.apache.uima.textmarker.type.BREAK; import org.apache.uima.textmarker.type.CAP; @@ -45,27 +45,8 @@ import org.apache.uima.textmarker.type.S %% %{ - private int number = 0; - - private Map<String,String> tags = new HashMap<String,String>(); private JCas cas; - private final static Pattern tagPattern = - Pattern.compile("</?(\\w+)([^>]*)>"); - private String splitAndPutInMap(String tag){ - Matcher m = tagPattern.matcher(tag); - if(m.find()){ - String name = m.group(1).toLowerCase(); - tags.put(name,m.group(2)); - return name; - } else { - return "!"; - } - } - private void removeTag(String closingTag){ - String cTag = closingTag.replace("</",""); - cTag = cTag.replace(">","").toLowerCase(); - tags.remove(cTag.trim()); - } + public void setJCas(JCas cas) { this.cas = cas; } @@ -74,7 +55,7 @@ import org.apache.uima.textmarker.type.S %unicode %line %char -%type TextMarkerBasic +%type AnnotationFS %class SeedLexer ALPHA=[A-Za-z] @@ -89,38 +70,34 @@ SPACE=[ \t] <YYINITIAL> { \<[/][!][^>]*> { - removeTag(yytext()); MARKUP t = new MARKUP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } \<[!][^>]*> { - String tag = splitAndPutInMap(yytext()); MARKUP t = new MARKUP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } \<[/][A-Za-z][A-Za-z0-9]*[^>]*> { - removeTag(yytext()); MARKUP t = new MARKUP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } \<[A-Za-z][A-Za-z0-9]*[^>]*> { - String tag = splitAndPutInMap(yytext()); MARKUP t = new MARKUP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -128,7 +105,7 @@ SPACE=[ \t] NBSP t = new NBSP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -136,7 +113,7 @@ SPACE=[ \t] AMP t = new AMP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -144,7 +121,7 @@ SPACE=[ \t] BREAK t = new BREAK(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -152,7 +129,7 @@ SPACE=[ \t] SPACE t = new SPACE(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -160,7 +137,7 @@ SPACE=[ \t] COLON t = new COLON(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -168,7 +145,7 @@ SPACE=[ \t] COMMA t = new COMMA(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -176,7 +153,7 @@ SPACE=[ \t] PERIOD t = new PERIOD(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -185,7 +162,7 @@ SPACE=[ \t] EXCLAMATION t = new EXCLAMATION(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -193,7 +170,7 @@ SPACE=[ \t] SEMICOLON t = new SEMICOLON(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -201,7 +178,7 @@ SPACE=[ \t] QUESTION t = new QUESTION(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -209,7 +186,7 @@ SPACE=[ \t] SW t = new SW(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -217,7 +194,7 @@ SPACE=[ \t] CW t = new CW(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -225,7 +202,7 @@ SPACE=[ \t] CAP t = new CAP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -233,7 +210,7 @@ SPACE=[ \t] NUM t = new NUM(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; } @@ -241,7 +218,7 @@ SPACE=[ \t] SPECIAL t = new SPECIAL(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); - t.setTags(tags); + return t; }