Author: pkluegl Date: Thu Jan 12 16:32:47 2012 New Revision: 1230625 URL: http://svn.apache.org/viewvc?rev=1230625&view=rev Log: UIMA-2330 Added greedy filtering behavior for rule inference. Added unit tests for basic filtering functionality.
Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java?rev=1230625&r1=1230624&r2=1230625&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/constraint/BasicTypeConstraint.java Thu Jan 12 16:32:47 2012 @@ -64,7 +64,7 @@ public class BasicTypeConstraint impleme TextMarkerBasic tmb = (TextMarkerBasic) fs; if (types != null) { for (Type each : types) { - result |= tmb.beginsWith(each) && tmb.endsWith(each); + result |= tmb.isPartOf(each)|| tmb.beginsWith(each) || tmb.endsWith(each); if (result) break; } Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java?rev=1230625&r1=1230624&r2=1230625&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/TextMarkerTypeMatcher.java Thu Jan 12 16:32:47 2012 @@ -26,7 +26,6 @@ import java.util.List; import java.util.TreeSet; import org.apache.uima.cas.ConstraintFactory; -import org.apache.uima.cas.FSIterator; import org.apache.uima.cas.FSMatchConstraint; import org.apache.uima.cas.Type; import org.apache.uima.cas.text.AnnotationFS; @@ -51,7 +50,8 @@ public class TextMarkerTypeMatcher imple public Collection<AnnotationFS> getMatchingAnnotations(TextMarkerStream stream, TextMarkerBlock parent) { - + // TODO what about the matching direction? + // TODO this comparator can ignore some annotations?! same offset same type Collection<AnnotationFS> result = new TreeSet<AnnotationFS>(comparator); List<Type> types = getTypes(parent, stream); for (Type type : types) { @@ -64,19 +64,18 @@ public class TextMarkerTypeMatcher imple result.add(stream.getDocumentAnnotation()); } else { - FSIterator<AnnotationFS> iterator = stream.getFilter().createFilteredIterator( - stream.getCas(), type); - - // AnnotationIndex<AnnotationFS> annotationIndex = stream.getCas().getAnnotationIndex(type); - // stream.getCas().createFilteredIterator(annotationIndex.iterator(), - // stream.getFilter().createFilteredIterator(null, stream, type)); - // FSMatchConstraint anchorConstraint = createAnchorConstraints(parent, stream); - // FSIterator<AnnotationFS> iterator = stream.getFilteredBasicIterator(anchorConstraint); - // iterator.moveToFirst(); - while (iterator.isValid()) { - AnnotationFS annotation = iterator.get(); - result.add(annotation); - iterator.moveToNext(); + stream.moveToFirst(); + while (stream.isValid()) { + TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get(); + List<Type> allTypes = stream.getCas().getTypeSystem().getProperlySubsumedTypes(type); + allTypes.add(type); + for (Type eachType : allTypes) { + Collection<AnnotationFS> beginAnchors = nextBasic.getBeginAnchors(eachType); + if (beginAnchors != null) { + result.addAll(beginAnchors); + } + } + stream.moveToNext(); } } } @@ -91,12 +90,10 @@ public class TextMarkerTypeMatcher imple stream.moveToNext(); if (stream.isValid()) { TextMarkerBasic nextBasic = (TextMarkerBasic) stream.get(); - // TODO also child types! List<Type> reTypes = ruleElement.getMatcher().getTypes(parent, stream); Collection<AnnotationFS> anchors = new TreeSet<AnnotationFS>(new AnnotationComparator()); for (Type eachMatchType : reTypes) { - List<Type> types = stream.getCas().getTypeSystem().getProperlySubsumedTypes(eachMatchType); types.add(eachMatchType); for (Type eachType : types) { Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java?rev=1230625&r1=1230624&r2=1230625&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/main/java/org/apache/uima/textmarker/rule/quantifier/PlusGreedy.java Thu Jan 12 16:32:47 2012 @@ -47,7 +47,7 @@ public class PlusGreedy implements RuleE } for (RuleElementMatch match : matches) { allEmpty &= match.getTextsMatched().isEmpty(); - result &= match.getTextsMatched().isEmpty() || match.matched(); + result &= match.matched(); } if (!result && matches.size() > 1) { matches.remove(matches.size() - 1); Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java?rev=1230625&view=auto ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java (added) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/AllTests.java Thu Jan 12 16:32:47 2012 @@ -0,0 +1,12 @@ +package org.apache.uima.textmarker; + +import org.junit.runner.RunWith; +import org.junit.runners.Suite; +import org.junit.runners.Suite.SuiteClasses; + +@RunWith(Suite.class) +@SuiteClasses({ DynamicAnchoringTest.class, DynamicAnchoringTest2.class, FilteringTest.class, + QuantifierTest1.class, QuantifierTest2.class, RuleInferenceTest.class }) +public class AllTests { + +} Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java?rev=1230625&view=auto ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java (added) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/java/org/apache/uima/textmarker/FilteringTest.java Thu Jan 12 16:32:47 2012 @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.textmarker; + +import static org.junit.Assert.assertEquals; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.FSIterator; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.cas.text.AnnotationIndex; +import org.junit.Test; + +public class FilteringTest { + + @Test + public void test() { + String name = this.getClass().getSimpleName(); + String namespace = this.getClass().getPackage().getName().replaceAll("\\.", "/"); + CAS cas = null; + try { + cas = TextMarkerTestUtils.process(namespace + "/" + name + ".tm", namespace + "/" + name + + ".txt", 50); + } catch (Exception e) { + e.printStackTrace(); + assert (false); + } + Type t = null; + AnnotationIndex<AnnotationFS> ai = null; + FSIterator<AnnotationFS> iterator = null; + + t = TextMarkerTestUtils.getTestType(cas, 1); + ai = cas.getAnnotationIndex(t); + assertEquals(0, ai.size()); + + t = TextMarkerTestUtils.getTestType(cas, 2); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(2, ai.size()); + assertEquals("Peter, Jochen", iterator.next().getCoveredText()); + assertEquals("Jochen, Flo", iterator.next().getCoveredText()); + + t = TextMarkerTestUtils.getTestType(cas, 3); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(0, ai.size()); + + t = TextMarkerTestUtils.getTestType(cas, 4); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(2, ai.size()); + assertEquals(", Jochen, ", iterator.next().getCoveredText()); + assertEquals(", ", iterator.next().getCoveredText()); + + t = TextMarkerTestUtils.getTestType(cas, 5); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(4, ai.size()); + assertEquals("Peter, Jochen, Flo", iterator.next().getCoveredText()); + assertEquals("Flo und", iterator.next().getCoveredText()); + assertEquals("und Georg", iterator.next().getCoveredText()); + assertEquals("Georg.", iterator.next().getCoveredText()); + + + t = TextMarkerTestUtils.getTestType(cas, 15); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(1, ai.size()); + assertEquals("Peter, Jochen", iterator.next().getCoveredText()); + + t = TextMarkerTestUtils.getTestType(cas, 16); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(1, ai.size()); + assertEquals("Georg.", iterator.next().getCoveredText()); + + t = TextMarkerTestUtils.getTestType(cas, 17); + ai = cas.getAnnotationIndex(t); + iterator = ai.iterator(); + assertEquals(1, ai.size()); + assertEquals("Flo und Georg.", iterator.next().getCoveredText()); + + if (cas != null) { + cas.release(); + } + + } +} Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm?rev=1230625&view=auto ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm (added) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.tm Thu Jan 12 16:32:47 2012 @@ -0,0 +1,28 @@ +PACKAGE org.apache.uima; + +DECLARE T1,T2,T3,T4,T5,T6,T7,T8,T9,T10,T11,T12,T13,T14,T15,T16,T17,T18,T19,T20,T21,T22,T23,T24,T25; + + +Document{-> RETAINTYPE(SPACE)}; + +W W{-> MARK(T1)}; + +W PM SPACE W{-> MARK(T2,1,2,3,4)}; + +Document{-> FILTERTYPE(W)}; + +W{-> MARK(T3)}; + +(PM SPACE)+{-> MARK(T4)}; + +Document{-> RETAINTYPE(W)}; + +Document{-> FILTERTYPE(T4)}; + +W ANY{-> MARK(T5,1,2)}; + +Document{-> FILTERTYPE, RETAINTYPE}; + +(((CW PM) CW){-> MARK(T15)} COMMA) + (CW SW (CW PERIOD){-> MARK(T16)}) + {-> MARK(T17)}; \ No newline at end of file Added: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt?rev=1230625&view=auto ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt (added) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-engine/src/test/resources/org/apache/uima/textmarker/FilteringTest.txt Thu Jan 12 16:32:47 2012 @@ -0,0 +1 @@ +Peter, Jochen, Flo und Georg. Modified: uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java URL: http://svn.apache.org/viewvc/uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java?rev=1230625&r1=1230624&r2=1230625&view=diff ============================================================================== --- uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java (original) +++ uima/sandbox/trunk/TextMarker/uimaj-ep-textmarker-ide/src/main/java/org/apache/uima/textmarker/ide/validator/TextMarkerTypeChecker.java Thu Jan 12 16:32:47 2012 @@ -28,6 +28,7 @@ import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.Stack; +import java.util.TreeSet; import org.apache.uima.UIMAFramework; import org.apache.uima.resource.ResourceManager; @@ -485,7 +486,7 @@ public class TextMarkerTypeChecker imple System.err.println("ERROR: Failed to get BasicTypeSystem!! " + this.toString()); } if (types == null) { - types = new HashSet<String>(); + types = new TreeSet<String>(); } // traverse: @@ -511,7 +512,7 @@ public class TextMarkerTypeChecker imple } public Set<String> getShortTypeNames(Set<String> types) { - Set<String> result = new HashSet<String>(); + Set<String> result = new TreeSet<String>(); for (String string : types) { String[] nameSpace = string.split("[.]"); result.add(nameSpace[nameSpace.length - 1]); @@ -539,7 +540,7 @@ public class TextMarkerTypeChecker imple fileExtended += "/"; } fileExtended = fileExtended.substring(0, fileExtended.length() - 1) + ".xml"; - Set<String> types = new HashSet<String>(); + Set<String> types = new TreeSet<String>(); for (IFolder folder : folders) { types.addAll(getTypes(folder, fileExtended)); }