Author: pkluegl
Date: Wed Oct 29 14:21:19 2014
New Revision: 1635143
URL: http://svn.apache.org/r1635143
Log:
UIMA-4071
- fixed feature assignment on matches with ignored tokens or with invisible
spans
- fixed threshold for max ignored chars
- extended test
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
(original)
+++
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
Wed Oct 29 14:21:19 2014
@@ -848,6 +848,17 @@ public class RutaStream extends FSIterat
return result;
}
+ public String getVisibleCoveredText(AnnotationFS annotationFS) {
+ StringBuilder result = new StringBuilder();
+ List<RutaBasic> basicsInWindow = getBasicsInWindow(annotationFS);
+ for (RutaBasic each : basicsInWindow) {
+ if(isVisible(each)) {
+ result.append(each.getCoveredText());
+ }
+ }
+ return result.toString();
+ }
+
}
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
(original)
+++
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
Wed Oct 29 14:21:19 2014
@@ -64,8 +64,8 @@ public class MarkTableAction extends Abs
public MarkTableAction(TypeExpression typeExpr, INumberExpression indexExpr,
WordTableExpression tableExpr, Map<IStringExpression,
INumberExpression> featureMap,
- IBooleanExpression ignoreCase, INumberExpression ignoreLength,
IStringExpression ignoreChar,
- INumberExpression maxIgnoreChar) {
+ IBooleanExpression ignoreCase, INumberExpression ignoreLength,
+ IStringExpression ignoreChar, INumberExpression maxIgnoreChar) {
super();
this.typeExpr = typeExpr;
this.indexExpr = indexExpr;
@@ -102,7 +102,16 @@ public class MarkTableAction extends Abs
Collection<AnnotationFS> found = wordList.find(stream, ignoreCaseValue,
ignoreLengthValue,
ignoreCharValue.toCharArray(), maxIgnoreCharValue, true);
for (AnnotationFS annotationFS : found) {
- List<String> rowWhere = table.getRowWhere(index - 1,
annotationFS.getCoveredText());
+ // HOTFIX: for feature assignment
+ String candidate = stream.getVisibleCoveredText(annotationFS);
+ for (int i = 0; i < maxIgnoreCharValue; i++) {
+ candidate = candidate.replaceFirst("[" + ignoreCharValue + "]", "");
+ }
+ List<String> rowWhere = table.getRowWhere(index - 1, candidate);
+ if (rowWhere.isEmpty() && ignoreCaseValue && candidate.length() >
ignoreLengthValue) {
+ // TODO: does not cover all variants
+ rowWhere = table.getRowWhere(index - 1, candidate.toLowerCase());
+ }
FeatureStructure newFS = stream.getCas().createFS(type);
if (newFS instanceof Annotation) {
Annotation a = (Annotation) newFS;
Modified:
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
(original)
+++
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
Wed Oct 29 14:21:19 2014
@@ -63,11 +63,11 @@ public class TreeWordList implements Rut
/**
* Constructs a TreeWordList from a resource.
- *
+ *
* @param resource
* Resource to create a TextWordList from
* @throws IllegalArgumentException
- * When {@code resource.getFileName()} is null or does not end with
.txt or .twl.
+ * When {@code resource.getFileName()} is null or does not end
with .txt or .twl.
*/
public TreeWordList(Resource resource) throws IOException {
final String name = resource.getFilename();
@@ -94,7 +94,7 @@ public class TreeWordList implements Rut
/**
* Constructs a TreeWordList from a file with path = filename
- *
+ *
* @param pathname
* path of the file to create a TextWordList from
*/
@@ -187,7 +187,7 @@ public class TreeWordList implements Rut
*/
public boolean contains(String s, boolean ignoreCase, int size, char[]
ignoreChars,
int maxIgnoreChars, boolean ignoreWS) {
- if(s == null) {
+ if (s == null) {
return false;
}
TextNode pointer = root;
@@ -216,10 +216,14 @@ public class TreeWordList implements Rut
for (char each : ignoreChars) {
if (each == charAt) {
charAtIgnored = true;
+ maxIgnoreChars--;
break;
}
}
charAtIgnored &= index != 0;
+ if (maxIgnoreChars < 0) {
+ return false;
+ }
}
int next = ++index;
@@ -397,8 +401,8 @@ public class TreeWordList implements Rut
return name;
}
- public List<AnnotationFS> find(RutaStream stream, Map<String, Object>
typeMap, boolean ignoreCase,
- int ignoreLength, boolean edit, double distance, String ignoreToken)
{
+ public List<AnnotationFS> find(RutaStream stream, Map<String, Object>
typeMap,
+ boolean ignoreCase, int ignoreLength, boolean edit, double distance,
String ignoreToken) {
return null;
}
Modified:
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
(original)
+++
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
Wed Oct 29 14:21:19 2014
@@ -74,8 +74,28 @@ public class MarkTableTest {
Feature f1 = t.getFeatureByBaseName(fn1);
Feature f2 = t.getFeatureByBaseName(fn2);
ai = cas.getAnnotationIndex(t);
- assertEquals(3, ai.size());
+
+ assertEquals(7, ai.size());
iterator = ai.iterator();
+
+ next = iterator.next();
+ v1 = next.getStringValue(f1);
+ v2 = next.getStringValue(f2);
+ assertEquals("Peter", v1);
+ assertEquals("Ruta", v2);
+
+ next = iterator.next();
+ v1 = next.getStringValue(f1);
+ v2 = next.getStringValue(f2);
+ assertEquals("Marshall", v1);
+ assertEquals("UIMA", v2);
+
+ next = iterator.next();
+ v1 = next.getStringValue(f1);
+ v2 = next.getStringValue(f2);
+ assertEquals("Joern", v1);
+ assertEquals("CAS Editor", v2);
+
next = iterator.next();
v1 = next.getStringValue(f1);
v2 = next.getStringValue(f2);
Modified:
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
(original)
+++
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
Wed Oct 29 14:21:19 2014
@@ -6,4 +6,4 @@ DECLARE T1, T2, T3, T4, T5;
DECLARE Annotation Person (STRING firstname, STRING system);
-Document{-> MARKTABLE(Person, 1, table, true, 0, "", 0, "firstname" = 2,
"system" = 3)};
\ No newline at end of file
+Document{-> MARKTABLE(Person, 1, table, true, 0, "-.,", 10, "firstname" = 2,
"system" = 3)};
\ No newline at end of file
Modified:
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
URL:
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
---
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
(original)
+++
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
Wed Oct 29 14:21:19 2014
@@ -1,3 +1,14 @@
-Peter Kluegl: Ruta
-Marshall Schor: UIMA
-Joern Kottmann: CAS Editor
\ No newline at end of file
+Kluegl
+Schor
+Kottmann
+
+now with some extra chars
+
+K-l--ue-gl
+Sc h<b> o</b>.r
+K,o, t,,tm,ann
+
+should not match:
+
+K-l-...........................-ue-gl
+