Author: pkluegl
Date: Thu Jan 12 16:32:47 2012
New Revision: 1230625

URL: http://svn.apache.org/viewvc?rev=1230625&view=rev
Log:
UIMA-2330
Added greedy filtering behavior for rule inference. Added unit tests for basic 
filtering functionality.

Added:
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
Modified:
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
    
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java
 Thu Jan 12 16:32:47 2012
@@ -64,7 +64,7 @@ public class BasicTypeConstraint impleme
       TextMarkerBasic tmb = (TextMarkerBasic) fs;
       if (types != null) {
         for (Type each : types) {
-          result |= tmb.beginsWith(each) && tmb.endsWith(each);
+          result |= tmb.isPartOf(each)|| tmb.beginsWith(each) || 
tmb.endsWith(each);
           if (result)
             break;
         }

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java
 Thu Jan 12 16:32:47 2012
@@ -26,7 +26,6 @@ import java.util.List;
 import java.util.TreeSet;
 
 import org.apache.uima.cas.ConstraintFactory;
-import org.apache.uima.cas.FSIterator;
 import org.apache.uima.cas.FSMatchConstraint;
 import org.apache.uima.cas.Type;
 import org.apache.uima.cas.text.AnnotationFS;
@@ -51,7 +50,8 @@ public class TextMarkerTypeMatcher imple
 
   public Collection<AnnotationFS> getMatchingAnnotations(TextMarkerStream 
stream,
           TextMarkerBlock parent) {
-
+    // TODO what about the matching direction?
+    // TODO this comparator can ignore some annotations?! same offset same type
     Collection<AnnotationFS> result = new TreeSet<AnnotationFS>(comparator);
     List<Type> types = getTypes(parent, stream);
     for (Type type : types) {
@@ -64,19 +64,18 @@ public class TextMarkerTypeMatcher imple
         result.add(stream.getDocumentAnnotation());
 
       } else {
-        FSIterator<AnnotationFS> iterator = 
stream.getFilter().createFilteredIterator(
-                stream.getCas(), type);
-
-        // AnnotationIndex<AnnotationFS> annotationIndex = 
stream.getCas().getAnnotationIndex(type);
-        // stream.getCas().createFilteredIterator(annotationIndex.iterator(),
-        // stream.getFilter().createFilteredIterator(null, stream, type));
-        // FSMatchConstraint anchorConstraint = 
createAnchorConstraints(parent, stream);
-        // FSIterator<AnnotationFS> iterator = 
stream.getFilteredBasicIterator(anchorConstraint);
-        // iterator.moveToFirst();
-        while (iterator.isValid()) {
-          AnnotationFS annotation = iterator.get();
-          result.add(annotation);
-          iterator.moveToNext();
+        stream.moveToFirst();
+        while (stream.isValid()) {
+          TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get();
+          List<Type> allTypes = 
stream.getCas().getTypeSystem().getProperlySubsumedTypes(type);
+          allTypes.add(type);
+          for (Type eachType : allTypes) {
+            Collection<AnnotationFS> beginAnchors = 
nextBasic.getBeginAnchors(eachType);
+            if (beginAnchors != null) {
+              result.addAll(beginAnchors);
+            }
+          }
+          stream.moveToNext();
         }
       }
     }
@@ -91,12 +90,10 @@ public class TextMarkerTypeMatcher imple
     stream.moveToNext();
     if (stream.isValid()) {
       TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get();
-      // TODO also child types!
       List<Type> reTypes = ruleElement.getMatcher().getTypes(parent, stream);
 
       Collection<AnnotationFS> anchors = new TreeSet<AnnotationFS>(new 
AnnotationComparator());
       for (Type eachMatchType : reTypes) {
-
         List<Type> types = 
stream.getCas().getTypeSystem().getProperlySubsumedTypes(eachMatchType);
         types.add(eachMatchType);
         for (Type eachType : types) {

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java
 Thu Jan 12 16:32:47 2012
@@ -47,7 +47,7 @@ public class PlusGreedy implements RuleE
     }
     for (RuleElementMatch match : matches) {
       allEmpty &= match.getTextsMatched().isEmpty();
-      result &= match.getTextsMatched().isEmpty() || match.matched();
+      result &=  match.matched();
     }
     if (!result && matches.size() > 1) {
       matches.remove(matches.size() - 1);

Added: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java?rev=1230625&view=auto
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
 (added)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java
 Thu Jan 12 16:32:47 2012
@@ -0,0 +1,12 @@
+package org.apache.uima.textmarker;
+
+import org.junit.runner.RunWith;
+import org.junit.runners.Suite;
+import org.junit.runners.Suite.SuiteClasses;
+
+@RunWith(Suite.class)
+@SuiteClasses({ DynamicAnchoringTest.class, DynamicAnchoringTest2.class, 
FilteringTest.class,
+    QuantifierTest1.class, QuantifierTest2.class, RuleInferenceTest.class })
+public class AllTests {
+
+}

Added: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java?rev=1230625&view=auto
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
 (added)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java
 Thu Jan 12 16:32:47 2012
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.textmarker;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.FSIterator;
+import org.apache.uima.cas.Type;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.junit.Test;
+
+public class FilteringTest {
+
+  @Test
+  public void test() {
+    String name = this.getClass().getSimpleName();
+    String namespace = 
this.getClass().getPackage().getName().replaceAll("\\.", "/");
+    CAS cas = null;
+    try {
+      cas = TextMarkerTestUtils.process(namespace + "/" + name + ".tm", 
namespace + "/" + name
+              + ".txt", 50);
+    } catch (Exception e) {
+      e.printStackTrace();
+      assert (false);
+    }
+    Type t = null;
+    AnnotationIndex<AnnotationFS> ai = null;
+    FSIterator<AnnotationFS> iterator = null;
+
+    t = TextMarkerTestUtils.getTestType(cas, 1);
+    ai = cas.getAnnotationIndex(t);
+    assertEquals(0, ai.size());
+    
+    t = TextMarkerTestUtils.getTestType(cas, 2);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(2, ai.size());
+    assertEquals("Peter, Jochen", iterator.next().getCoveredText());
+    assertEquals("Jochen, Flo", iterator.next().getCoveredText());
+
+    t = TextMarkerTestUtils.getTestType(cas, 3);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(0, ai.size());
+
+    t = TextMarkerTestUtils.getTestType(cas, 4);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(2, ai.size());
+    assertEquals(", Jochen, ", iterator.next().getCoveredText());
+    assertEquals(", ", iterator.next().getCoveredText());
+
+    t = TextMarkerTestUtils.getTestType(cas, 5);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(4, ai.size());
+    assertEquals("Peter, Jochen, Flo", iterator.next().getCoveredText());
+    assertEquals("Flo und", iterator.next().getCoveredText());
+    assertEquals("und Georg", iterator.next().getCoveredText());
+    assertEquals("Georg.", iterator.next().getCoveredText());
+
+  
+    t = TextMarkerTestUtils.getTestType(cas, 15);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(1, ai.size());
+    assertEquals("Peter, Jochen", iterator.next().getCoveredText());
+
+    t = TextMarkerTestUtils.getTestType(cas, 16);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(1, ai.size());
+    assertEquals("Georg.", iterator.next().getCoveredText());
+
+    t = TextMarkerTestUtils.getTestType(cas, 17);
+    ai = cas.getAnnotationIndex(t);
+    iterator = ai.iterator();
+    assertEquals(1, ai.size());
+    assertEquals("Flo und Georg.", iterator.next().getCoveredText());
+
+    if (cas != null) {
+      cas.release();
+    }
+
+  }
+}

Added: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm?rev=1230625&view=auto
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
 (added)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm
 Thu Jan 12 16:32:47 2012
@@ -0,0 +1,28 @@
+PACKAGE org.apache.uima;
+
+DECLARE 
T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25;
+
+
+Document{-> RETAINTYPE(SPACE)};
+
+W W{-> MARK(T1)};
+
+W PM SPACE W{-> MARK(T2,1,2,3,4)};
+
+Document{-> FILTERTYPE(W)};
+
+W{-> MARK(T3)};
+
+(PM SPACE)+{-> MARK(T4)};
+
+Document{-> RETAINTYPE(W)};
+
+Document{-> FILTERTYPE(T4)};
+
+W ANY{-> MARK(T5,1,2)};
+
+Document{-> FILTERTYPE, RETAINTYPE};
+
+(((CW PM) CW){-> MARK(T15)} COMMA)
+  (CW SW  (CW PERIOD){-> MARK(T16)})
+  {-> MARK(T17)};
\ No newline at end of file

Added: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt?rev=1230625&view=auto
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
 (added)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt
 Thu Jan 12 16:32:47 2012
@@ -0,0 +1 @@
+Peter, Jochen, Flo und Georg.

Modified: 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java?rev=1230625&r1=1230624&r2=1230625&view=diff
==============================================================================
--- 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java
 (original)
+++ 
uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java
 Thu Jan 12 16:32:47 2012
@@ -28,6 +28,7 @@ import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
 import java.util.Stack;
+import java.util.TreeSet;
 
 import org.apache.uima.UIMAFramework;
 import org.apache.uima.resource.ResourceManager;
@@ -485,7 +486,7 @@ public class TextMarkerTypeChecker imple
       System.err.println("ERROR: Failed to get BasicTypeSystem!! " + 
this.toString());
     }
     if (types == null) {
-      types = new HashSet<String>();
+      types = new TreeSet<String>();
     }
 
     // traverse:
@@ -511,7 +512,7 @@ public class TextMarkerTypeChecker imple
   }
 
   public Set<String> getShortTypeNames(Set<String> types) {
-    Set<String> result = new HashSet<String>();
+    Set<String> result = new TreeSet<String>();
     for (String string : types) {
       String[] nameSpace = string.split("[.]");
       result.add(nameSpace[nameSpace.length - 1]);
@@ -539,7 +540,7 @@ public class TextMarkerTypeChecker imple
       fileExtended += "/";
     }
     fileExtended = fileExtended.substring(0, fileExtended.length() - 1) + 
".xml";
-    Set<String> types = new HashSet<String>();
+    Set<String> types = new TreeSet<String>();
     for (IFolder folder : folders) {
       types.addAll(getTypes(folder, fileExtended));
     }


Reply via email to