svn commit: r1366364 - in /uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker: TextMarkerStream.java engine/AnnotationWriter.java rule/TextMarkerTypeMatcher.java
Author: pkluegl Date: Fri Jul 27 12:28:37 2012 New Revision: 1366364 URL: http://svn.apache.org/viewvc?rev=1366364&view=rev Log: no jira - fixed imports in TextMarkerStream - improved output generation in AnnoationWriter - added test on null in TextMarkerTypeMatcher Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/engine/AnnotationWriter.java uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java?rev=1366364&r1=1366363&r2=1366364&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/TextMarkerStream.java Fri Jul 27 12:28:37 2012 @@ -37,7 +37,6 @@ import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.FSMatchConstraint; import org.apache.uima.cas.FeatureStructure; import org.apache.uima.cas.Type; -import org.apache.uima.cas.TypeSystem; import org.apache.uima.cas.impl.FSIteratorImplBase; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.cas.text.AnnotationIndex; @@ -49,7 +48,6 @@ import org.apache.uima.textmarker.rule.R import org.apache.uima.textmarker.type.TextMarkerAnnotation; import org.apache.uima.textmarker.type.TextMarkerBasic; import org.apache.uima.textmarker.visitor.InferenceCrowd; -import org.hamcrest.CoreMatchers; public class TextMarkerStream extends FSIteratorImplBase { Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/engine/AnnotationWriter.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/engine/AnnotationWriter.java?rev=1366364&r1=1366363&r2=1366364&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/engine/AnnotationWriter.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/engine/AnnotationWriter.java Fri Jul 27 12:28:37 2012 @@ -67,16 +67,14 @@ public class AnnotationWriter extends JC Type targetType = cas.getTypeSystem().getType(type); FSIterator iterator = cas.getAnnotationIndex(targetType).iterator(); -String newDocument = ""; +StringBuilder newDocument = new StringBuilder(); while (iterator.isValid()) { AnnotationFS fs = iterator.get(); - newDocument += fs.getCoveredText() + "\n\n"; + newDocument.append(fs.getCoveredText()); + newDocument.append("\n"); iterator.moveToNext(); } -newDocument = newDocument.trim(); -if ("".equals(newDocument)) { - newDocument = cas.getDocumentText(); -} + Type sdiType = cas.getTypeSystem().getType(TextMarkerEngine.SOURCE_DOCUMENT_INFORMATION); @@ -98,7 +96,7 @@ public class AnnotationWriter extends JC } try { - FileUtils.saveString2File(newDocument, file, encoding); + FileUtils.saveString2File(newDocument.toString(), file, encoding); } catch (IOException e) { throw new AnalysisEngineProcessException(e); } Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java?rev=1366364&r1=1366363&r2=1366364&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java Fri Jul 27 12:28:37 2012 @@ -85,6 +85,9 @@ public class TextMarkerTypeMatcher imple public Collection getAnnotationsAfter(TextMarkerRuleElement ruleElement, AnnotationFS annotation, TextMarkerStream stream, TextMarkerBlock parent) { TextMarkerBasic lastBasic = stream.getEndAnchor(annotation.getEnd()); +if(lastBasic== null) { + return Collections.emptyList(); +} stream.moveTo(lastBasic); stream.moveToNext(); if (stream.isValid()) { @@ -110,6 +113,9 @@ public class TextMarkerTypeMatcher imple public Collection getAnno
svn commit: r1366346 - /uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java
Author: pkluegl Date: Fri Jul 27 11:54:41 2012 New Revision: 1366346 URL: http://svn.apache.org/viewvc?rev=1366346&view=rev Log: UIMA-2439 - changed order in if-statement. Second alternative was already covered by first one since TypeExpression implements StringExpression for automatic conversion Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java?rev=1366346&r1=1366345&r2=1366346&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/action/AbstractStructureAction.java Fri Jul 27 11:54:41 2012 @@ -65,10 +65,7 @@ public abstract class AbstractStructureA Object valueObject = map.get(shortFName); Type range = targetFeature.getRange(); if (valueObject != null) { -if (valueObject instanceof StringExpression && range.getName().equals(UIMAConstants.TYPE_STRING)) { - structure.setStringValue(targetFeature, - ((StringExpression) valueObject).getStringValue(element.getParent())); -} else if (valueObject instanceof TypeExpression +if (valueObject instanceof TypeExpression && range.getName().equals(UIMAConstants.TYPE_STRING)) { TypeExpression type = (TypeExpression) valueObject; List annotationsInWindow = stream.getAnnotationsInWindow(matchedAnnotation, @@ -77,25 +74,30 @@ public abstract class AbstractStructureA AnnotationFS annotation = annotationsInWindow.get(0); structure.setStringValue(targetFeature, annotation.getCoveredText()); } +} else if (valueObject instanceof StringExpression +&& range.getName().equals(UIMAConstants.TYPE_STRING)) { + structure.setStringValue(targetFeature, + ((StringExpression) valueObject).getStringValue(element.getParent())); + } else if (valueObject instanceof NumberExpression) { - if(range.getName().equals(UIMAConstants.TYPE_INTEGER)) { + if (range.getName().equals(UIMAConstants.TYPE_INTEGER)) { structure.setIntValue(targetFeature, ((NumberExpression) valueObject).getIntegerValue(element.getParent())); - } else if(range.getName().equals(UIMAConstants.TYPE_DOUBLE)) { + } else if (range.getName().equals(UIMAConstants.TYPE_DOUBLE)) { structure.setDoubleValue(targetFeature, ((NumberExpression) valueObject).getDoubleValue(element.getParent())); - } else if(range.getName().equals(UIMAConstants.TYPE_FLOAT)) { + } else if (range.getName().equals(UIMAConstants.TYPE_FLOAT)) { structure.setFloatValue(targetFeature, ((NumberExpression) valueObject).getFloatValue(element.getParent())); - } else if(range.getName().equals(UIMAConstants.TYPE_BYTE)) { -structure.setByteValue(targetFeature, (byte) -((NumberExpression) valueObject).getIntegerValue(element.getParent())); - } else if(range.getName().equals(UIMAConstants.TYPE_SHORT)) { -structure.setShortValue(targetFeature, (short) -((NumberExpression) valueObject).getIntegerValue(element.getParent())); - } else if(range.getName().equals(UIMAConstants.TYPE_LONG)) { -structure.setLongValue(targetFeature, (long) -((NumberExpression) valueObject).getIntegerValue(element.getParent())); + } else if (range.getName().equals(UIMAConstants.TYPE_BYTE)) { +structure.setByteValue(targetFeature, +(byte) ((NumberExpression) valueObject).getIntegerValue(element.getParent())); + } else if (range.getName().equals(UIMAConstants.TYPE_SHORT)) { +structure.setShortValue(targetFeature, +(short) ((NumberExpression) valueObject).getIntegerValue(element.getParent())); + } else if (range.getName().equals(UIMAConstants.TYPE_LONG)) { +structure.setLongValue(targetFeature, +(long) ((NumberExpression) valueObject).getIntegerValue(element.getParent())); } } else if (valueObject instanceof BooleanExpression && range.getName().equals(UIMAConstants.TYPE_BOOLEAN)) {
svn commit: r1366345 - /uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java
Author: pkluegl Date: Fri Jul 27 11:50:51 2012 New Revision: 1366345 URL: http://svn.apache.org/viewvc?rev=1366345&view=rev Log: UIMA-2438 - added implementation to evaluate regexp on string variables Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java?rev=1366345&r1=1366344&r2=1366345&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/RegExpCondition.java Fri Jul 27 11:50:51 2012 @@ -23,6 +23,7 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.textmarker.TextMarkerEnvironment; import org.apache.uima.textmarker.TextMarkerStream; import org.apache.uima.textmarker.expression.bool.BooleanExpression; import org.apache.uima.textmarker.expression.bool.SimpleBooleanExpression; @@ -36,30 +37,54 @@ public class RegExpCondition extends Ter private BooleanExpression ignoreCase; + private String variable; + public RegExpCondition(StringExpression pattern, BooleanExpression ignoreCase) { super(); this.pattern = pattern; this.ignoreCase = ignoreCase == null ? new SimpleBooleanExpression(false) : ignoreCase; } + public RegExpCondition(String variable, StringExpression pattern, BooleanExpression ignoreCase) { +this(pattern, ignoreCase); +this.variable = variable; + } + @Override public EvaluatedCondition eval(AnnotationFS annotation, RuleElement element, TextMarkerStream stream, InferenceCrowd crowd) { -String coveredText = annotation.getCoveredText(); +Matcher matcher = null; boolean ignore = ignoreCase == null ? false : ignoreCase.getBooleanValue(element.getParent()); -Pattern regularExpPattern = null; String stringValue = pattern.getStringValue(element.getParent()); -if (ignore) { - regularExpPattern = Pattern.compile(stringValue, Pattern.CASE_INSENSITIVE); +if (variable == null) { + String coveredText = annotation.getCoveredText(); + Pattern regularExpPattern = null; + if (ignore) { +regularExpPattern = Pattern.compile(stringValue, Pattern.CASE_INSENSITIVE); + } else { +regularExpPattern = Pattern.compile(stringValue); + } + matcher = regularExpPattern.matcher(coveredText); } else { - regularExpPattern = Pattern.compile(stringValue); + TextMarkerEnvironment environment = element.getParent().getEnvironment(); + String variableValue = environment.getVariableValue(variable, String.class); + Pattern regularExpPattern = null; + if (ignore) { +regularExpPattern = Pattern.compile(stringValue, Pattern.CASE_INSENSITIVE); + } else { +regularExpPattern = Pattern.compile(stringValue); + } + matcher = regularExpPattern.matcher(variableValue); } -Matcher macther = regularExpPattern.matcher(coveredText); -boolean matches = macther.matches(); +boolean matches = matcher.matches(); return new EvaluatedCondition(this, matches); } public StringExpression getPattern() { return pattern; } + + public StringExpression getVariable() { +return pattern; + } }
svn commit: r1366343 - in /uima/sandbox/trunk/TextMarker: uima-docbook-textmarker/ uima-docbook-textmarker/src/docbook/ uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/pars
Author: pkluegl Date: Fri Jul 27 11:49:32 2012 New Revision: 1366343 URL: http://svn.apache.org/viewvc?rev=1366343&view=rev Log: UIMA-2438 - added argument to regexp condition in order to evaluate given pattern on the variable value instead of the matched text - extended grammars - updated documentation Modified: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/ (props changed) uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/antlr3/org/apache/uima/textmarker/parser/TextMarkerParser.g uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/java/org/apache/uima/textmarker/condition/ConditionFactory.java Propchange: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/ -- --- svn:ignore (added) +++ svn:ignore Fri Jul 27 11:49:32 2012 @@ -0,0 +1,2 @@ +target +.settings Modified: uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml?rev=1366343&r1=1366342&r2=1366343&view=diff == --- uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml (original) +++ uima/sandbox/trunk/TextMarker/uima-docbook-textmarker/src/docbook/tools.textmarker.conditions.xml Fri Jul 27 11:49:32 2012 @@ -726,8 +726,11 @@ REGEXP The REGEXP condition is fulfilled if the given pattern matches on the - matched annotation. For more details on the syntax of regular - expressions, have a look at the + matched annotation. However, if a string variable is given as the first + argument, then the pattern is evaluated on the value of the variable. + For more details on the syntax of regular + expressions, have a look at + the http://docs.oracle.com/javase/1.4.2/docs/api/java/util/regex/Pattern.html";>Java API . By default the REGEXP condition is case-sensitive. To change this @@ -738,7 +741,7 @@ Definition: - + Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g?rev=1366343&r1=1366342&r2=1366343&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g Fri Jul 27 11:49:32 2012 @@ -1060,8 +1060,8 @@ conditionPosition returns [TextMarkerCon ; conditionRegExp returns [TextMarkerCondition cond = null] : -name = REGEXP LPAREN pattern = stringExpression (COMMA caseSensitive = booleanExpression)? -{cond = ConditionFactory.createCondition(name, pattern, caseSensitive);} +name = REGEXP LPAREN (v = variable COMMA)? pattern = stringExpression (COMMA caseSensitive = booleanExpression)? +{cond = ConditionFactory.createCondition(name, v, pattern, caseSensitive);} RPAREN ; Modified: uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/antlr3/org/apache/uima/textmarker/parser/TextMarkerParser.g URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/antlr3/org/apache/uima/textmarker/parser/TextMarkerParser.g?rev=1366343&r1=1366342&r2=1366343&view=diff == --- uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/antlr3/org/apache/uima/textmarker/parser/TextMarkerParser.g (original) +++ uima/sandbox/trunk/TextMarker/uimaj-textmarker/src/main/antlr3/org/apache/uima/textmarker/parser/TextMarkerParser.g Fri Jul 27 11:49:32 2012 @@ -1005,8 +1005,8 @@ conditionPosition returns [AbstractTextM ; conditionRegExp returns [AbstractTextMarkerCondition cond = null] : -REGEXP LPAREN pattern = stringExpression (COMMA caseSensitive = booleanExpression)? RPAREN -{cond = ConditionFactory.create