Author: pkluegl Date: Sun Jun 2 15:46:35 2013 New Revision: 1488731 URL: http://svn.apache.org/r1488731 Log: UIMA-2777 - fixed some inference flaws - optimized whisk
Removed: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/tools/TestSetGenerator.java Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/PlusReluctant.java uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/QuestionReluctant.java uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/StarReluctant.java uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerMultiSlotRule.java uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/WhiskRuleItem.java uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/token/WhiskRuleItem.java uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/ui/TextRulerView.java Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java (original) +++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/WildCardRuleElement.java Sun Jun 2 15:46:35 2013 @@ -132,8 +132,12 @@ public class WildCardRuleElement extends doMatch(coveredByWildCard, extendedMatch, extendedContainerMatch, annotation == null, stream, crowd); if (extendedMatch.matched()) { - cre.continueMatch(after, endAnchor, extendedMatch, ruleApply, extendedContainerMatch, - sideStepOrigin, cre, stream, crowd); + if (endAnchor == null) { + cre.startMatch(extendedMatch, ruleApply, extendedContainerMatch, cre, stream, crowd); + } else { + cre.continueMatch(after, endAnchor, extendedMatch, ruleApply, extendedContainerMatch, + sideStepOrigin, cre, stream, crowd); + } List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(cre); boolean matched = hasMatched(nextList); if (!matched) { @@ -238,8 +242,13 @@ public class WildCardRuleElement extends doMatch(coveredByWildCard, extendedMatch, extendedContainerMatch, annotation == null, stream, crowd); if (extendedMatch.matched()) { - nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply, - extendedContainerMatch, sideStepOrigin, nextElement, stream, crowd); + if (endAnchor == null) { + nextElement.startMatch(extendedMatch, ruleApply, extendedContainerMatch, nextElement, + stream, crowd); + } else { + nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply, + extendedContainerMatch, sideStepOrigin, nextElement, stream, crowd); + } List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(nextElement); if (nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched()) { moveOn(after, iterator); @@ -281,8 +290,8 @@ public class WildCardRuleElement extends return iterator; } - private FSIterator<AnnotationFS> getIteratorOfType(boolean after, Type type, AnnotationFS annotation, - RutaStream stream) { + private FSIterator<AnnotationFS> getIteratorOfType(boolean after, Type type, + AnnotationFS annotation, RutaStream stream) { CAS cas = stream.getCas(); FSIterator<AnnotationFS> result = null; if (stream.getDocumentAnnotation().equals(cas.getDocumentAnnotation())) { @@ -346,8 +355,13 @@ public class WildCardRuleElement extends doMatch(coveredByWildCard, extendedMatch, extendedContainerMatch, annotation == null, stream, crowd); if (extendedMatch.matched()) { - nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply, - extendedContainerMatch, sideStepOrigin, nextElement, stream, crowd); + if (endAnchor == null) { + nextElement.startMatch(extendedMatch, ruleApply, extendedContainerMatch, nextElement, + stream, crowd); + } else { + nextElement.continueMatch(after, endAnchor, extendedMatch, ruleApply, + extendedContainerMatch, sideStepOrigin, nextElement, stream, crowd); + } List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(nextElement); if (nextList == null || nextList.isEmpty()) { pointer = getNextPointer(after, anchor); @@ -430,15 +444,15 @@ public class WildCardRuleElement extends RutaBasic beginAnchor = stream.getBeginAnchor(begin); RutaBasic endAnchor = stream.getEndAnchor(end); - if(beginAnchor != null && !stream.isVisible(beginAnchor)) { - beginAnchor = stream.getBasicNextTo(false, beginAnchor); + if (beginAnchor != null && !stream.isVisible(beginAnchor)) { + beginAnchor = stream.getBasicNextTo(false, beginAnchor); begin = beginAnchor.getBegin(); } - if(endAnchor != null && !stream.isVisible(endAnchor)) { - endAnchor = stream.getBasicNextTo(true, endAnchor); + if (endAnchor != null && !stream.isVisible(endAnchor)) { + endAnchor = stream.getBasicNextTo(true, endAnchor); end = endAnchor.getEnd(); } - + AnnotationFS afs = cas.createAnnotation(type, begin, end); return afs; Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/PlusReluctant.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/PlusReluctant.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/PlusReluctant.java (original) +++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/PlusReluctant.java Sun Jun 2 15:46:35 2013 @@ -72,7 +72,7 @@ public class PlusReluctant implements Ru nextElement.continueMatch(after, annotation, extendedMatch, null, extendedContainerMatch, null, nextElement, stream, crowd); List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(nextElement); - return nextList == null || nextList.isEmpty(); + return nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched(); } public boolean isOptional(RutaBlock parent, RutaStream stream) { Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/QuestionReluctant.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/QuestionReluctant.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/QuestionReluctant.java (original) +++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/QuestionReluctant.java Sun Jun 2 15:46:35 2013 @@ -67,7 +67,7 @@ public class QuestionReluctant implement nextElement.continueMatch(after, annotation, extendedMatch, null, extendedContainerMatch, null, nextElement, stream, crowd); List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(nextElement); - return nextList == null || nextList.isEmpty(); + return nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched(); } public boolean isOptional(RutaBlock parent, RutaStream stream) { Modified: uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/StarReluctant.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/StarReluctant.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/StarReluctant.java (original) +++ uima/sandbox/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/rule/quantifier/StarReluctant.java Sun Jun 2 15:46:35 2013 @@ -50,7 +50,7 @@ public class StarReluctant implements Ru nextElement.continueMatch(after, annotation, extendedMatch, null, extendedContainerMatch, null, nextElement, stream, crowd); List<RuleElementMatch> nextList = extendedContainerMatch.getInnerMatches().get(nextElement); - return nextList == null || nextList.isEmpty(); + return nextList == null || nextList.isEmpty() || !nextList.get(nextList.size() - 1).matched(); } public boolean isOptional(RutaBlock parent, RutaStream stream) { Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerMultiSlotRule.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerMultiSlotRule.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerMultiSlotRule.java (original) +++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/core/TextRulerMultiSlotRule.java Sun Jun 2 15:46:35 2013 @@ -53,7 +53,7 @@ public class TextRulerMultiSlotRule exte } protected String getInterslotWildCard() { - return "ALL*? "; + return "# "; } @Override Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java (original) +++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/Whisk.java Sun Jun 2 15:46:35 2013 @@ -635,6 +635,7 @@ public class Whisk extends TextRulerBasi // create base 1 and base 2: WhiskRule base1 = rule.copy(); // slot filler rule TextRulerSlotPattern slotPattern = base1.getPatterns().get(slotIndex); + // questionable restriction: for (int i = 0; i < inside.size(); i++) if (i == 0 || (i == inside.size() - 1)) slotPattern.fillerPattern.add(inside.get(i).copy()); @@ -644,18 +645,60 @@ public class Whisk extends TextRulerBasi List<WhiskRuleItem> beforeList = getTermsBefore(inside.get(0), example); List<WhiskRuleItem> afterList = getTermsAfter(inside.get(inside.size() - 1), example); + beforeList.add(null); + afterList.add(null); Collection<WhiskRule> tempRules = new HashSet<WhiskRule>(); + // workaround for better rules: + // only inner begin + for (WhiskRuleItem eachBefore : beforeList) { + for (WhiskRuleItem eachAfter : afterList) { + WhiskRule copy = rule.copy(); + TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex); + if(eachBefore != null) { + textRulerSlotPattern.preFillerPattern.add(eachBefore); + } + textRulerSlotPattern.fillerPattern.add(inside.get(0).copy()); + textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(0)); + if(eachAfter != null) { + textRulerSlotPattern.postFillerPattern.add(eachAfter); + } + tempRules.add(copy); + } + } + // onnly inner end + for (WhiskRuleItem eachBefore : beforeList) { + for (WhiskRuleItem eachAfter : afterList) { + WhiskRule copy = rule.copy(); + TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex); + if(eachBefore != null) { + textRulerSlotPattern.preFillerPattern.add(eachBefore); + } + textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(0)); + textRulerSlotPattern.fillerPattern.add(inside.get(inside.size()-1).copy()); + if(eachAfter != null) { + textRulerSlotPattern.postFillerPattern.add(eachAfter); + } + tempRules.add(copy); + } + } + + + if (!beforeList.isEmpty()) { if (!afterList.isEmpty()) { for (WhiskRuleItem eachBefore : beforeList) { for (WhiskRuleItem eachAfter : afterList) { WhiskRule copy = rule.copy(); TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex); - textRulerSlotPattern.preFillerPattern.add(eachBefore); + if(eachBefore != null ) { + textRulerSlotPattern.preFillerPattern.add(eachBefore); + } textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(getElementIndex( copy, inside.get(0)))); - textRulerSlotPattern.postFillerPattern.add(eachAfter); + if(eachAfter != null) { + textRulerSlotPattern.postFillerPattern.add(eachAfter); + } tempRules.add(copy); } } @@ -665,7 +708,9 @@ public class Whisk extends TextRulerBasi TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex); textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(getElementIndex( copy, inside.get(0)))); - textRulerSlotPattern.preFillerPattern.add(eachBefore); + if(eachBefore != null) { + textRulerSlotPattern.preFillerPattern.add(eachBefore); + } tempRules.add(copy); } } @@ -675,7 +720,9 @@ public class Whisk extends TextRulerBasi TextRulerSlotPattern textRulerSlotPattern = copy.getPatterns().get(slotIndex); textRulerSlotPattern.fillerPattern.add(WhiskRuleItem.newWildCardItem(getElementIndex( copy, inside.get(0)))); - textRulerSlotPattern.postFillerPattern.add(eachAfter); + if(eachAfter!= null) { + textRulerSlotPattern.postFillerPattern.add(eachAfter); + } tempRules.add(copy); } } @@ -693,24 +740,35 @@ public class Whisk extends TextRulerBasi } } WhiskRule base2 = (WhiskRule) best; - - TextRulerToolkit.log("base1: " + base1.getRuleString()); - TextRulerToolkit.log("base2: " + base2.getRuleString()); List<TextRulerRule> testRules = new ArrayList<TextRulerRule>(); - testRules.add(base1); - testRules.add(base2); + if (base1 != null) { + TextRulerToolkit.log("base1: " + base1.getRuleString()); + testRules.add(base1); + } + if (base2 != null) { + TextRulerToolkit.log("base2: " + base2.getRuleString()); + testRules.add(base2); + } testRulesIfNotCached(testRules); - if (shouldAbort()) + if (shouldAbort()) { return null; - TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = " - + base1.getLaplacian()); - TextRulerToolkit.log("\tbase2: " + base2.getCoveringStatistics() + " --> laplacian = " - + base2.getLaplacian()); - if (base2.getCoveringStatistics().getCoveredPositivesCount() > base1.getCoveringStatistics() - .getCoveredPositivesCount()) - result.add(base2); - else + } + if (base1 != null && base2 == null) { + TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = " + + base1.getLaplacian()); result.add(base1); + } else { + TextRulerToolkit.log("\tbase1: " + base1.getCoveringStatistics() + " --> laplacian = " + + base1.getLaplacian()); + TextRulerToolkit.log("\tbase2: " + base2.getCoveringStatistics() + " --> laplacian = " + + base2.getLaplacian()); + if (base2.getCoveringStatistics().getCoveredPositivesCount() > base1 + .getCoveringStatistics().getCoveredPositivesCount()) { + result.add(base2); + } else { + result.add(base1); + } + } } TextRulerRule best = null; for (TextRulerRule each : result) { @@ -891,7 +949,6 @@ public class Whisk extends TextRulerBasi String key = r.getRuleString(); if (cachedTestedRuleStatistics.containsKey(key)) { r.setCoveringStatistics(cachedTestedRuleStatistics.get(key).copy()); - TextRulerToolkit.log("CACHE HIT !"); } else rulesToTest.add(r); } Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/WhiskRuleItem.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/WhiskRuleItem.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/WhiskRuleItem.java (original) +++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/generic/WhiskRuleItem.java Sun Jun 2 15:46:35 2013 @@ -195,8 +195,12 @@ public class WhiskRuleItem implements Te result += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize); result += ")}"; } - if (isStarWildCard) + if (isStarWildCard) { anchor += "*?"; + if(anchor.equals("ALL*?")) { + anchor = "#"; + } + } return anchor + result; } Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/token/WhiskRuleItem.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/token/WhiskRuleItem.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/token/WhiskRuleItem.java (original) +++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/whisk/token/WhiskRuleItem.java Sun Jun 2 15:46:35 2013 @@ -195,8 +195,12 @@ public class WhiskRuleItem implements Te result += ", " + (numberInRule + 1) + ", " + (numberInRule + patternSize); result += ")}"; } - if (isStarWildCard) + if (isStarWildCard) { anchor += "*?"; + if(anchor.equals("ALL*?")) { + anchor = "#"; + } + } return anchor + result; } Modified: uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/ui/TextRulerView.java URL: http://svn.apache.org/viewvc/uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/ui/TextRulerView.java?rev=1488731&r1=1488730&r2=1488731&view=diff ============================================================================== --- uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/ui/TextRulerView.java (original) +++ uima/sandbox/ruta/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/ui/TextRulerView.java Sun Jun 2 15:46:35 2013 @@ -267,67 +267,4 @@ public class TextRulerView extends ViewP viewContent.saveState(memento); } - // BATCH_STUFF - // private static String preprocessorTMFile; - // private static String[] slotNames; - // private static Set<String> filters; - // private static Map<String, Map<String, Object>> algParams; - // private static boolean skipPreprocessing; - // - // int foldNumber = 0; - // int slotNumber = -1; - // - // private static final int kFoldSize = 1; - // - // public void batchNext() - // { - // slotNumber++; - // if (slotNumber > slotNames.length-1) - // { - // slotNumber = 0; - // foldNumber ++; - // if (foldNumber > (kFoldSize-1)) - // { - // TextRulerToolkit.log("DONE WITH ALL STUFF, YAAAA!"); - // return; // stop - // } - // } - // - // // if (foldNumber == 2 && (slotNumber == 0 || slotNumber==1)) // SKIP for - // now! - // // { - // // algorithmDidEnd(null); - // // return; - // // } - // - // TextRulerToolkit.log("******* ******* ******* ******* NEW BATCH TASK:"); - // TextRulerToolkit.log("Fold: "+foldNumber); - // TextRulerToolkit.log("Slot: "+slotNumber); - // TextRulerToolkit.log("******* ******* ******* *******"); - // - // // String inFolder = - // "/Users/tobi/Documents/UniLaptop/Diplomarbeit/TestDataSets/withPosTags/Subset100/10fold/"+foldNumber+"/training/withtags"; - // // String inFolder = - // "/Users/tobi/Documents/UniLaptop/Diplomarbeit/TestDataSets/withPosTags/halfhalf/"+foldNumber+"/training/withtags"; - // String inFolder = - // "/Users/tobi/Documents/UniLaptop/Diplomarbeit/TestDataSets/withPosTags/9010_middle/"+foldNumber+"/training/withtags"; - // String[] slots = new String[slotNames.length]; - // int otherI = 1; - // for (int i=0; i<slotNames.length; i++) - // { - // if (i==slotNumber) - // slots[0] = slotNames[i]; - // else - // { - // slots[otherI] = slotNames[i]; - // otherI++; - // } - // - // } - // for (String s : slots) - // TextRulerToolkit.log("slot: "+s); - // TextRulerToolkit.log(inFolder); - // TextRulerController.start(inFolder, preprocessorTMFile, slots, filters, - // this, algParams, skipPreprocessing); - // } }