Author: pkluegl
Date: Wed Oct 29 14:21:19 2014
New Revision: 1635143

URL: http://svn.apache.org/r1635143
Log:
UIMA-4071
- fixed feature assignment on matches with ignored tokens or with invisible 
spans
- fixed threshold for max ignored chars
- extended test

Modified:
    uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
    
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
    
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
    
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
    
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
    
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java 
(original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/RutaStream.java 
Wed Oct 29 14:21:19 2014
@@ -848,6 +848,17 @@ public class RutaStream extends FSIterat
     return result;
   }
 
+  public String getVisibleCoveredText(AnnotationFS annotationFS) {
+    StringBuilder result = new StringBuilder();
+    List<RutaBasic> basicsInWindow = getBasicsInWindow(annotationFS);
+    for (RutaBasic each : basicsInWindow) {
+      if(isVisible(each)) {
+        result.append(each.getCoveredText());
+      }
+    }
+    return result.toString();
+  }
+
 
 
 }

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/action/MarkTableAction.java
 Wed Oct 29 14:21:19 2014
@@ -64,8 +64,8 @@ public class MarkTableAction extends Abs
 
   public MarkTableAction(TypeExpression typeExpr, INumberExpression indexExpr,
           WordTableExpression tableExpr, Map<IStringExpression, 
INumberExpression> featureMap,
-          IBooleanExpression ignoreCase, INumberExpression ignoreLength, 
IStringExpression ignoreChar,
-          INumberExpression maxIgnoreChar) {
+          IBooleanExpression ignoreCase, INumberExpression ignoreLength,
+          IStringExpression ignoreChar, INumberExpression maxIgnoreChar) {
     super();
     this.typeExpr = typeExpr;
     this.indexExpr = indexExpr;
@@ -102,7 +102,16 @@ public class MarkTableAction extends Abs
     Collection<AnnotationFS> found = wordList.find(stream, ignoreCaseValue, 
ignoreLengthValue,
             ignoreCharValue.toCharArray(), maxIgnoreCharValue, true);
     for (AnnotationFS annotationFS : found) {
-      List<String> rowWhere = table.getRowWhere(index - 1, 
annotationFS.getCoveredText());
+      // HOTFIX: for feature assignment
+      String candidate = stream.getVisibleCoveredText(annotationFS);
+      for (int i = 0; i < maxIgnoreCharValue; i++) {
+        candidate = candidate.replaceFirst("[" + ignoreCharValue + "]", "");
+      }
+      List<String> rowWhere = table.getRowWhere(index - 1, candidate);
+      if (rowWhere.isEmpty() && ignoreCaseValue && candidate.length() > 
ignoreLengthValue) {
+        // TODO: does not cover all variants
+        rowWhere = table.getRowWhere(index - 1, candidate.toLowerCase());
+      }
       FeatureStructure newFS = stream.getCas().createFS(type);
       if (newFS instanceof Annotation) {
         Annotation a = (Annotation) newFS;

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/resource/TreeWordList.java
 Wed Oct 29 14:21:19 2014
@@ -63,11 +63,11 @@ public class TreeWordList implements Rut
 
   /**
    * Constructs a TreeWordList from a resource.
-   *
+   * 
    * @param resource
    *          Resource to create a TextWordList from
    * @throws IllegalArgumentException
-   *          When {@code resource.getFileName()} is null or does not end with 
.txt or .twl.
+   *           When {@code resource.getFileName()} is null or does not end 
with .txt or .twl.
    */
   public TreeWordList(Resource resource) throws IOException {
     final String name = resource.getFilename();
@@ -94,7 +94,7 @@ public class TreeWordList implements Rut
 
   /**
    * Constructs a TreeWordList from a file with path = filename
-   *
+   * 
    * @param pathname
    *          path of the file to create a TextWordList from
    */
@@ -187,7 +187,7 @@ public class TreeWordList implements Rut
    */
   public boolean contains(String s, boolean ignoreCase, int size, char[] 
ignoreChars,
           int maxIgnoreChars, boolean ignoreWS) {
-    if(s == null) {
+    if (s == null) {
       return false;
     }
     TextNode pointer = root;
@@ -216,10 +216,14 @@ public class TreeWordList implements Rut
       for (char each : ignoreChars) {
         if (each == charAt) {
           charAtIgnored = true;
+          maxIgnoreChars--;
           break;
         }
       }
       charAtIgnored &= index != 0;
+      if (maxIgnoreChars < 0) {
+        return false;
+      }
     }
     int next = ++index;
 
@@ -397,8 +401,8 @@ public class TreeWordList implements Rut
     return name;
   }
 
-  public List<AnnotationFS> find(RutaStream stream, Map<String, Object> 
typeMap, boolean ignoreCase,
-          int ignoreLength, boolean edit, double distance, String ignoreToken) 
{
+  public List<AnnotationFS> find(RutaStream stream, Map<String, Object> 
typeMap,
+          boolean ignoreCase, int ignoreLength, boolean edit, double distance, 
String ignoreToken) {
     return null;
   }
 

Modified: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/action/MarkTableTest.java
 Wed Oct 29 14:21:19 2014
@@ -74,8 +74,28 @@ public class MarkTableTest {
     Feature f1 = t.getFeatureByBaseName(fn1);
     Feature f2 = t.getFeatureByBaseName(fn2);
     ai = cas.getAnnotationIndex(t);
-    assertEquals(3, ai.size());
+    
+    assertEquals(7, ai.size());
     iterator = ai.iterator();
+    
+    next = iterator.next();
+    v1 = next.getStringValue(f1);
+    v2 = next.getStringValue(f2);
+    assertEquals("Peter", v1);
+    assertEquals("Ruta", v2);
+    
+    next = iterator.next();
+    v1 = next.getStringValue(f1);
+    v2 = next.getStringValue(f2);
+    assertEquals("Marshall", v1);
+    assertEquals("UIMA", v2);
+    
+    next = iterator.next();
+    v1 = next.getStringValue(f1);
+    v2 = next.getStringValue(f2);
+    assertEquals("Joern", v1);
+    assertEquals("CAS Editor", v2);
+    
     next = iterator.next();
     v1 = next.getStringValue(f1);
     v2 = next.getStringValue(f2);

Modified: 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.ruta
 Wed Oct 29 14:21:19 2014
@@ -6,4 +6,4 @@ DECLARE T1, T2, T3, T4, T5;
 
 DECLARE Annotation Person (STRING firstname, STRING system);
 
-Document{-> MARKTABLE(Person, 1, table, true, 0, "", 0, "firstname" = 2, 
"system" = 3)};
\ No newline at end of file
+Document{-> MARKTABLE(Person, 1, table, true, 0, "-.,", 10, "firstname" = 2, 
"system" = 3)};
\ No newline at end of file

Modified: 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt?rev=1635143&r1=1635142&r2=1635143&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/action/MarkTableTest.txt
 Wed Oct 29 14:21:19 2014
@@ -1,3 +1,14 @@
-Peter Kluegl: Ruta
-Marshall Schor: UIMA
-Joern Kottmann: CAS Editor
\ No newline at end of file
+Kluegl
+Schor
+Kottmann
+
+now with some extra chars
+
+K-l--ue-gl
+Sc h<b> o</b>.r
+K,o,   t,,tm,ann
+
+should not match:
+
+K-l-...........................-ue-gl
+


Reply via email to