Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/kep/KEPLearner.java Mon Nov 18 12:19:31 2019 @@ -28,6 +28,8 @@ import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import java.util.logging.Level; +import java.util.logging.Logger; import org.apache.uima.cas.CAS; import org.apache.uima.cas.ConstraintFactory; @@ -142,7 +144,8 @@ public class KEPLearner extends TextRule removeBadRules(); long estimatedTime = (System.nanoTime() - startTime) / 1000000000; - System.out.println(estimatedTime + " seconds needed to learn all rules"); + Logger.getLogger(getClass().getName()).log(Level.INFO, + estimatedTime + " seconds needed to learn all rules"); sendStatusUpdateToDelegate("Done", TextRulerLearnerState.ML_DONE, true); } @@ -193,15 +196,13 @@ public class KEPLearner extends TextRule exampleMap.put(annotation.getType().getName(), list); } } - double aLength = (double) (annotation.getEnd() - annotation.getBegin()); - lengthMap.put( - annotation.getType().getName(), - lengthMap.get(annotation.getType().getName()) == null ? aLength : lengthMap - .get(annotation.getType().getName()) + aLength); - countMap.put( - annotation.getType().getName(), - countMap.get(annotation.getType().getName()) == null ? 1 : countMap.get(annotation - .getType().getName()) + 1); + double aLength = annotation.getEnd() - annotation.getBegin(); + lengthMap.put(annotation.getType().getName(), + lengthMap.get(annotation.getType().getName()) == null ? aLength + : lengthMap.get(annotation.getType().getName()) + aLength); + countMap.put(annotation.getType().getName(), + countMap.get(annotation.getType().getName()) == null ? 1 + : countMap.get(annotation.getType().getName()) + 1); } } @@ -213,16 +214,17 @@ public class KEPLearner extends TextRule } double exLength = 0; for (TextRulerExample ex : exampleDocuments.getAllPositiveExamples()) { - exLength += (double) (ex.getAnnotation().getEnd() - ex.getAnnotation().getBegin()); + exLength += ex.getAnnotation().getEnd() - ex.getAnnotation().getBegin(); } double bestRatio = 0; Type bestType = null; for (Type type : result) { - if ((exLength / lengthMap.get(type.getName()) > bestRatio && countMap.get(type.getName()) <= exampleDocuments - .getAllPositiveExamples().size()) + if ((exLength / lengthMap.get(type.getName()) > bestRatio + && countMap.get(type.getName()) <= exampleDocuments.getAllPositiveExamples().size()) || (exLength / lengthMap.get(type.getName()) == bestRatio - && countMap.get(type.getName()) > countMap.get(bestType.getName()) && countMap - .get(type.getName()) <= exampleDocuments.getAllPositiveExamples().size())) { + && countMap.get(type.getName()) > countMap.get(bestType.getName()) + && countMap.get(type.getName()) <= exampleDocuments.getAllPositiveExamples() + .size())) { bestType = type; bestRatio = exLength / lengthMap.get(type.getName()); } @@ -235,8 +237,8 @@ public class KEPLearner extends TextRule private void learnRules(TextRulerTarget target) { List<KEPRule> ruleList = this.ruleLists.get(target.getSingleSlotTypeName()); - List<TextRulerExample> coveredExamples = this.coveredExamples.get(target - .getSingleSlotTypeName()); + List<TextRulerExample> coveredExamples = this.coveredExamples + .get(target.getSingleSlotTypeName()); List<TextRulerExample> positiveExamples = this.exampleDocuments.getAllPositiveExamples(); for (TextRulerExample e : positiveExamples) { @@ -261,9 +263,8 @@ public class KEPLearner extends TextRule private List<KEPRule> makeInFillerRulesForExample(TextRulerExample e) { sendStatusUpdateToDelegate( - "Searching for Infiller Rules for " - + e.getTarget().getSingleSlotTypeName() - .substring(e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1), + "Searching for Infiller Rules for " + e.getTarget().getSingleSlotTypeName() + .substring(e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1), TextRulerLearnerState.ML_RUNNING, false); Collection<KEPRule> rules = new HashSet<KEPRule>(); @@ -281,10 +282,9 @@ public class KEPLearner extends TextRule testRulesOnDocumentSet(result, exampleDocuments); sendStatusUpdateToDelegate( - "Searching for Infiller Rules for " - + e.getTarget().getSingleSlotTypeName() - .substring(e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1) - + " done", TextRulerLearnerState.ML_RUNNING, true); + "Searching for Infiller Rules for " + e.getTarget().getSingleSlotTypeName().substring( + e.getTarget().getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", + TextRulerLearnerState.ML_RUNNING, true); return new ArrayList<KEPRule>(rules); } @@ -302,8 +302,8 @@ public class KEPLearner extends TextRule Collection<KEPRule> expandedRules = new HashSet<KEPRule>(); if (rules.isEmpty()) { - List<AnnotationFS> seeds = getAnnotationsStartingAt(e.getDocumentCAS(), e.getAnnotation() - .getBegin(), e.getAnnotation().getEnd()); + List<AnnotationFS> seeds = getAnnotationsStartingAt(e.getDocumentCAS(), + e.getAnnotation().getBegin(), e.getAnnotation().getEnd()); if (seeds.isEmpty()) { expanding = false; } @@ -351,9 +351,8 @@ public class KEPLearner extends TextRule private List<KEPRule> getCandidateClassificationRules(TextRulerTarget target) { sendStatusUpdateToDelegate( - "Searching for Candidate Classification Rules for " - + target.getSingleSlotTypeName().substring( - target.getSingleSlotTypeName().lastIndexOf(".") + 1), + "Searching for Candidate Classification Rules for " + target.getSingleSlotTypeName() + .substring(target.getSingleSlotTypeName().lastIndexOf(".") + 1), TextRulerLearnerState.ML_RUNNING, false); List<KEPRule> result = new ArrayList<KEPRule>(); List<Type> types = getTokensInNExamples(exampleDocuments.getAllPositiveExamples(), @@ -366,9 +365,8 @@ public class KEPLearner extends TextRule // result = getBestAndOptimalRules(result); result = addConditions(result, target); sendStatusUpdateToDelegate( - "Searching for Candidate Classification Rules for " - + target.getSingleSlotTypeName().substring( - target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", + "Searching for Candidate Classification Rules for " + target.getSingleSlotTypeName() + .substring(target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", TextRulerLearnerState.ML_RUNNING, true); return result; } @@ -385,8 +383,8 @@ public class KEPLearner extends TextRule for (Type type : containedTypes) { if (!type.getName().equals(ruleItem.getType().getName()) && !ruleItem.containsAndCondition(type)) { - result.add(new KEPRule(this, target).addInFillerItem(ruleItem.copy().addAndCondition( - new KEPRuleItemCondition(type, Condition.CONTAINS, false)))); + result.add(new KEPRule(this, target).addInFillerItem(ruleItem.copy() + .addAndCondition(new KEPRuleItemCondition(type, Condition.CONTAINS, false)))); } } } @@ -413,9 +411,8 @@ public class KEPLearner extends TextRule result.addAll(addConditions(toRefine, target)); } sendStatusUpdateToDelegate( - "Adding conditions to rules for " - + target.getSingleSlotTypeName().substring( - target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", + "Adding conditions to rules for " + target.getSingleSlotTypeName() + .substring(target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", TextRulerLearnerState.ML_RUNNING, true); return result; } @@ -424,13 +421,9 @@ public class KEPLearner extends TextRule if (!baseRules.isEmpty() && !shouldAbort()) { sendStatusUpdateToDelegate( "Adding postfillers to rules for " - + baseRules - .get(0) - .getTarget() - .getSingleSlotTypeName() - .substring( - baseRules.get(0).getTarget().getSingleSlotTypeName() - .lastIndexOf(".") + 1), + + baseRules.get(0).getTarget().getSingleSlotTypeName() + .substring(baseRules.get(0).getTarget().getSingleSlotTypeName() + .lastIndexOf(".") + 1), TextRulerLearnerState.ML_RUNNING, true); } else { return new ArrayList<KEPRule>(); @@ -445,12 +438,12 @@ public class KEPLearner extends TextRule if (lastItem != null) { end = lastItem.getEnd(); } - List<AnnotationFS> annotations = getAnnotationsStartingAt(e.getDocumentCAS(), end, e - .getDocumentCAS().getDocumentText().length()); + List<AnnotationFS> annotations = getAnnotationsStartingAt(e.getDocumentCAS(), end, + e.getDocumentCAS().getDocumentText().length()); boolean blockBoundaryHit = false; for (AnnotationFS annotationFS : annotations) { - if (annotationFS.getType().equals( - blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) { + if (annotationFS.getType() + .equals(blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) { blockBoundaryHit = true; break; } @@ -488,13 +481,10 @@ public class KEPLearner extends TextRule resultList.addAll(makePreFillers(resultList, changed)); sendStatusUpdateToDelegate( "Adding postfillers to rules for " - + baseRules - .get(0) - .getTarget() - .getSingleSlotTypeName() - .substring( - baseRules.get(0).getTarget().getSingleSlotTypeName() - .lastIndexOf(".") + 1) + " done", + + baseRules.get(0).getTarget().getSingleSlotTypeName() + .substring(baseRules.get(0).getTarget().getSingleSlotTypeName() + .lastIndexOf(".") + 1) + + " done", TextRulerLearnerState.ML_RUNNING, true); return resultList; } @@ -503,13 +493,9 @@ public class KEPLearner extends TextRule if (!baseRules.isEmpty() && !shouldAbort()) { sendStatusUpdateToDelegate( "Adding prefillers to rules for " - + baseRules - .get(0) - .getTarget() - .getSingleSlotTypeName() - .substring( - baseRules.get(0).getTarget().getSingleSlotTypeName() - .lastIndexOf(".") + 1), + + baseRules.get(0).getTarget().getSingleSlotTypeName() + .substring(baseRules.get(0).getTarget().getSingleSlotTypeName() + .lastIndexOf(".") + 1), TextRulerLearnerState.ML_RUNNING, true); } else { return new ArrayList<KEPRule>(); @@ -527,8 +513,8 @@ public class KEPLearner extends TextRule List<AnnotationFS> annotations = getAnnotationsEndingAt(begin, e.getDocumentCAS()); boolean blockBoundaryHit = false; for (AnnotationFS annotationFS : annotations) { - if (annotationFS.getType().equals( - blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) { + if (annotationFS.getType() + .equals(blocks.get(baseRules.get(0).getTarget().getSingleSlotTypeName()))) { blockBoundaryHit = true; break; } @@ -566,13 +552,10 @@ public class KEPLearner extends TextRule sendStatusUpdateToDelegate( "Adding prefillers to rules for " - + baseRules - .get(0) - .getTarget() - .getSingleSlotTypeName() - .substring( - baseRules.get(0).getTarget().getSingleSlotTypeName() - .lastIndexOf(".") + 1) + " done", + + baseRules.get(0).getTarget().getSingleSlotTypeName() + .substring(baseRules.get(0).getTarget().getSingleSlotTypeName() + .lastIndexOf(".") + 1) + + " done", TextRulerLearnerState.ML_RUNNING, true); return resultList; } @@ -584,8 +567,8 @@ public class KEPLearner extends TextRule && hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) { List<KEPRule> list = new ArrayList<KEPRule>(); for (KEPRule kepRule : ruleLists.get(slotNames[i])) { - List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule - .getCoveringStatistics().getCoveredNegativeExamples()); + List<TextRulerExample> exList = new ArrayList<TextRulerExample>( + kepRule.getCoveringStatistics().getCoveredNegativeExamples()); exList.removeAll(getCorrectedExamples(slotNames[i])); if (exList.size() == 0) { list.add(kepRule); @@ -596,8 +579,8 @@ public class KEPLearner extends TextRule if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 1])) { List<KEPRule> list = new ArrayList<KEPRule>(); for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 1])) { - List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule - .getCoveringStatistics().getCoveredNegativeExamples()); + List<TextRulerExample> exList = new ArrayList<TextRulerExample>( + kepRule.getCoveringStatistics().getCoveredNegativeExamples()); exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 1])); if (exList.size() == 0) { list.add(kepRule); @@ -607,8 +590,8 @@ public class KEPLearner extends TextRule if (!hasPerfectRules.get(slotNamesWithBoundaries[3 * i + 2])) { List<KEPRule> list = new ArrayList<KEPRule>(); for (KEPRule kepRule : ruleLists.get(slotNamesWithBoundaries[3 * i + 2])) { - List<TextRulerExample> exList = new ArrayList<TextRulerExample>(kepRule - .getCoveringStatistics().getCoveredNegativeExamples()); + List<TextRulerExample> exList = new ArrayList<TextRulerExample>( + kepRule.getCoveringStatistics().getCoveredNegativeExamples()); exList.removeAll(getCorrectedExamples(slotNamesWithBoundaries[3 * i + 2])); if (exList.size() == 0) { list.add(kepRule); @@ -621,9 +604,8 @@ public class KEPLearner extends TextRule private List<KEPRule> makeRemovalRules(TextRulerTarget target) { sendStatusUpdateToDelegate( - "Searching for Removal Rules for " - + target.getSingleSlotTypeName().substring( - target.getSingleSlotTypeName().lastIndexOf(".") + 1), + "Searching for Removal Rules for " + target.getSingleSlotTypeName() + .substring(target.getSingleSlotTypeName().lastIndexOf(".") + 1), TextRulerLearnerState.ML_RUNNING, false); if (!hasFalsePositives(target.getSingleSlotTypeName())) return new ArrayList<KEPRule>(); @@ -638,18 +620,18 @@ public class KEPLearner extends TextRule if (!containedTypes.isEmpty()) { KEPRuleItem containsRuleItem = new KEPRuleItem(targetType); for (Type type : containedTypes) { - result.add(new KEPRule(this, target).addInFillerItem( - containsRuleItem.copy().addAndCondition( - new KEPRuleItemCondition(type, Condition.CONTAINS, true))) + result.add(new KEPRule(this, target) + .addInFillerItem(containsRuleItem.copy() + .addAndCondition(new KEPRuleItemCondition(type, Condition.CONTAINS, true))) .setCorrectionRule(true)); } } if (!notContainedTypes.isEmpty()) { KEPRuleItem notContainsRuleItem = new KEPRuleItem(targetType); for (Type type : notContainedTypes) { - result.add(new KEPRule(this, target).addInFillerItem( - notContainsRuleItem.copy().addAndCondition( - new KEPRuleItemCondition(type, Condition.CONTAINS, false))) + result.add(new KEPRule(this, target) + .addInFillerItem(notContainsRuleItem.copy() + .addAndCondition(new KEPRuleItemCondition(type, Condition.CONTAINS, false))) .setCorrectionRule(true)); } } @@ -667,14 +649,14 @@ public class KEPLearner extends TextRule } result.removeAll(toRemove); if (!toMerge.isEmpty()) { - result.add(new KEPRule(this, target).addInFillerItem( - new KEPRuleItem(targetType).addConditions(toMerge)).setCorrectionRule(true)); + result.add(new KEPRule(this, target) + .addInFillerItem(new KEPRuleItem(targetType).addConditions(toMerge)) + .setCorrectionRule(true)); testCorrectionRules(target); } sendStatusUpdateToDelegate( - "Searching for Removal Rules for " - + target.getSingleSlotTypeName().substring( - target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", + "Searching for Removal Rules for " + target.getSingleSlotTypeName() + .substring(target.getSingleSlotTypeName().lastIndexOf(".") + 1) + " done", TextRulerLearnerState.ML_RUNNING, true); return result; } @@ -688,16 +670,16 @@ public class KEPLearner extends TextRule private List<AnnotationFS> getAnnotationsEndingAt(int end, CAS cas) { List<AnnotationFS> result = new ArrayList<AnnotationFS>(); - FSIterator<AnnotationFS> it = cas.getAnnotationIndex( - cas.getTypeSystem().getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME)).iterator(); + FSIterator<AnnotationFS> it = cas + .getAnnotationIndex(cas.getTypeSystem().getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME)) + .iterator(); while (it.isValid() && it.get().getBegin() < end) { it.moveToNext(); } do it.moveToPrevious(); - while (it.isValid() - && (it.get().getBegin() >= end || filterSetWithSlotNames.contains(it.get().getType() - .getName()))); + while (it.isValid() && (it.get().getBegin() >= end + || filterSetWithSlotNames.contains(it.get().getType().getName()))); if (!it.isValid()) return result; end = it.get().getEnd(); @@ -753,11 +735,11 @@ public class KEPLearner extends TextRule List<KEPRule> tmpList = new ArrayList<KEPRule>(); List<TextRulerExample> coveredExamples = new ArrayList<TextRulerExample>(); List<TextRulerExample> positiveExamples = exampleDocuments.getAllPositiveExamples(); - List<TextRulerExample> correctedExamples = getCorrectedExamples(rules.get(0).getTarget() - .getSingleSlotTypeName()); + List<TextRulerExample> correctedExamples = getCorrectedExamples( + rules.get(0).getTarget().getSingleSlotTypeName()); for (KEPRule rule : rules) { - List<TextRulerExample> uncorrectedExamples = new ArrayList<TextRulerExample>(rule - .getCoveringStatistics().getCoveredNegativeExamples()); + List<TextRulerExample> uncorrectedExamples = new ArrayList<TextRulerExample>( + rule.getCoveringStatistics().getCoveredNegativeExamples()); uncorrectedExamples.removeAll(correctedExamples); if (uncorrectedExamples.size() == 0 && rule.getCoveringStatistics().getCoveredPositivesCount() > 0) @@ -774,8 +756,8 @@ public class KEPLearner extends TextRule List<KEPRule> bestRules = getBestRules(rules); while (!coveredExamples.containsAll(positiveExamples) && !bestRules.isEmpty()) { KEPRule bestRule = bestRules.get(0); - if (!coveredExamples.containsAll(bestRule.getCoveringStatistics() - .getCoveredPositiveExamples())) { + if (!coveredExamples + .containsAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples())) { coveredExamples.removeAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples()); coveredExamples.addAll(bestRule.getCoveringStatistics().getCoveredPositiveExamples()); tmpList.add(bestRule); @@ -790,6 +772,7 @@ public class KEPLearner extends TextRule if (rules.isEmpty()) return new ArrayList<KEPRule>(); final class AComparator implements Comparator<KEPRule> { + @Override public int compare(KEPRule r1, KEPRule r2) { if (r1.getCoveringStatistics().getCoveredPositivesCount() < r2.getCoveringStatistics() .getCoveredPositivesCount()) @@ -803,8 +786,9 @@ public class KEPLearner extends TextRule else if (r1.getCoveringStatistics().getCoveredNegativesCount() < r2.getCoveringStatistics() .getCoveredNegativesCount()) return -1; - else if (r1.getPreFiller().size() + r1.getInFiller().size() + r1.getPostFiller().size() < r2 - .getPreFiller().size() + r2.getInFiller().size() + r2.getPostFiller().size()) + else if (r1.getPreFiller().size() + r1.getInFiller().size() + + r1.getPostFiller().size() < r2.getPreFiller().size() + r2.getInFiller().size() + + r2.getPostFiller().size()) return -1; return 0; } @@ -818,9 +802,9 @@ public class KEPLearner extends TextRule KEPRule rule = rules.get(i); if ((3 * rule.getCoveringStatistics().getCoveredPositivesCount() >= rule .getCoveringStatistics().getCoveredNegativesCount()) - && (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples - .size() || !coveredExamples.containsAll(rule.getCoveringStatistics() - .getCoveredPositiveExamples()))) { + && (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples.size() + || !coveredExamples.containsAll( + rule.getCoveringStatistics().getCoveredPositiveExamples()))) { result.add(rule); coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples()); @@ -831,8 +815,8 @@ public class KEPLearner extends TextRule for (int i = 0; i < rules.size(); i++) { KEPRule rule = rules.get(i); if (rule.getCoveringStatistics().getCoveredPositivesCount() >= positiveExamples.size() - || !coveredExamples.containsAll(rule.getCoveringStatistics() - .getCoveredPositiveExamples())) { + || !coveredExamples + .containsAll(rule.getCoveringStatistics().getCoveredPositiveExamples())) { result.add(rule); coveredExamples.addAll(rule.getCoveringStatistics().getCoveredPositiveExamples()); @@ -865,10 +849,10 @@ public class KEPLearner extends TextRule example.getAnnotation().getBegin(), example.getAnnotation().getEnd(), filterSetWithSlotNames, null)) { if (!filterSetWithSlotNames.contains(a.getType().getName())) - if (((!countOnlyCoveringTokens) && (a.getBegin() >= example.getAnnotation().getBegin() && a - .getEnd() <= example.getAnnotation().getEnd())) - || (a.getBegin() == example.getAnnotation().getBegin() && a.getEnd() == example - .getAnnotation().getEnd())) { + if (((!countOnlyCoveringTokens) && (a.getBegin() >= example.getAnnotation().getBegin() + && a.getEnd() <= example.getAnnotation().getEnd())) + || (a.getBegin() == example.getAnnotation().getBegin() + && a.getEnd() == example.getAnnotation().getEnd())) { List<TextRulerExample> list = countMap.get(a.getType().getName()); if (list == null) { list = new ArrayList<TextRulerExample>(); @@ -906,6 +890,7 @@ public class KEPLearner extends TextRule return result; } + @Override public String getResultString() { StringBuffer ruleStrings = new StringBuffer(); if (slotNamesWithBoundaries == null || slotNamesWithBoundaries.length == 0) @@ -917,13 +902,12 @@ public class KEPLearner extends TextRule if (blockType != null && !(i > 0 && blocks.get(slotNamesWithBoundaries[i - 1]) != null && blocks .get(slotNamesWithBoundaries[i - 1]).getName().equals(blockType.getName()))) { - ruleStrings.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName() - + "{} { \n"); + ruleStrings.append( + "BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName() + "{} { \n"); } if (ruleList == null || ruleList.isEmpty()) { - if (blockType != null - && !(i < slotNamesWithBoundaries.length - 1 - && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks + if (blockType != null && !(i < slotNamesWithBoundaries.length - 1 + && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks .get(slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName()))) ruleStrings.append("} \n"); continue; @@ -933,10 +917,9 @@ public class KEPLearner extends TextRule ruleStrings.append((blockType != null ? "\t" : "") + rule.getRuleString() + "\t// " + rule.getCoveringStatistics() + "\n"); } - if (blockType != null - && !(i < slotNamesWithBoundaries.length - 1 - && blocks.get(slotNamesWithBoundaries[i + 1]) != null && blocks - .get(slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName()))) + if (blockType != null && !(i < slotNamesWithBoundaries.length - 1 + && blocks.get(slotNamesWithBoundaries[i + 1]) != null + && blocks.get(slotNamesWithBoundaries[i + 1]).getName().equals(blockType.getName()))) ruleStrings.append("}"); ruleStrings.append("\n"); } @@ -951,11 +934,11 @@ public class KEPLearner extends TextRule for (KEPRule rule : ruleList) { if (slotNamesWithBoundaries[i].contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION) || slotNamesWithBoundaries[i].contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION)) { - boundaryCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics() - + "\n"); + boundaryCorrectors + .append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics() + "\n"); } else { - wholeSlotCorrectors.append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics() - + "\n"); + wholeSlotCorrectors + .append(rule.getRuleString() + "\t// " + rule.getCoveringStatistics() + "\n"); } } } @@ -969,8 +952,8 @@ public class KEPLearner extends TextRule result.append("// " + slotName + " RULES \n"); Type blockType = blocks.get(slotName); if (blockType != null) { - result.append("BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName() - + "{} { \n"); + result.append( + "BLOCK(" + blockType.getShortName() + ") " + blockType.getShortName() + "{} { \n"); } List<KEPRule> ruleList = this.ruleLists.get(slotName); if (ruleList != null && !ruleList.isEmpty()) { @@ -995,12 +978,12 @@ public class KEPLearner extends TextRule + blockType.getShortName() + "{} { \n"); } String shortName = slotName.substring(slotName.lastIndexOf(".") + 1); - String str = shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE(" - + shortName + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION - + ";" + "\n"; + String str = shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{->MARKONCE(" + shortName + + ",1,3)} ANY*? " + shortName + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ";" + + "\n"; str += shortName + TextRulerToolkit.LEFT_BOUNDARY_EXTENSION + "{IS(" + shortName - + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE(" + shortName + ")} " - + ";" + "\n"; + + TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION + ")->MARKONCE(" + shortName + ")} " + ";" + + "\n"; result.append(str); if (blockType != null) { result.append("}"); @@ -1108,8 +1091,8 @@ public class KEPLearner extends TextRule private void prepareCASWithBoundaries(CAS cas) { for (String slotName : slotNames) - if (!(slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION) || slotName - .contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION))) + if (!(slotName.contains(TextRulerToolkit.LEFT_BOUNDARY_EXTENSION) + || slotName.contains(TextRulerToolkit.RIGHT_BOUNDARY_EXTENSION))) TextRulerExampleDocument.createBoundaryAnnotationsForCas(cas, slotName, filterSet); } @@ -1130,6 +1113,7 @@ public class KEPLearner extends TextRule return true; } + @Override public void setParameters(Map<String, Object> params) { if (TextRulerToolkit.DEBUG) saveParametersToTempFolder(params); @@ -1163,6 +1147,7 @@ public class KEPLearner extends TextRule private final FSTypeConstraint c = constraint; + @Override public boolean match(FeatureStructure fs) { return !c.match(fs); } @@ -1179,6 +1164,7 @@ public class KEPLearner extends TextRule this.cas = cas; } + @Override public int compare(KEPRule o1, KEPRule o2) { ArrayList<TextRulerRuleItem> items1 = o1.getInFiller(); items1.addAll(o1.getPostFiller());
Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierPatternSegment.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierPatternSegment.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierPatternSegment.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/rapier/RapierPatternSegment.java Mon Nov 18 12:19:31 2019 @@ -32,13 +32,12 @@ public class RapierPatternSegment { public void debugOutput() { TextRulerToolkit.log("\n-------------\nShorterList: "); - for (TextRulerRuleItem t : shorterPattern) - System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " "); - TextRulerToolkit.log(""); - - System.out.print("LongerList: "); - for (TextRulerRuleItem t : longerPattern) - System.out.print(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " "); - TextRulerToolkit.log(""); + for (TextRulerRuleItem t : shorterPattern) { + TextRulerToolkit.log(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " "); + } + TextRulerToolkit.log("LongerList: "); + for (TextRulerRuleItem t : longerPattern) { + TextRulerToolkit.log(t.getStringForRuleString(null, null, 0, 1, 0, 1, 0) + " "); + } } } Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/RankedList.java Mon Nov 18 12:19:31 2019 @@ -46,11 +46,7 @@ public class RankedList extends ArrayLis addAll(c); } - /** - * Adds the item to the list and increases its ranking by one. - * - * @return true, if the item was newly inserted into the list. - */ + @Override public boolean add(Condition e) { boolean result; double value; @@ -71,10 +67,6 @@ public class RankedList extends ArrayLis return result; } - /** - * Adds the item to the list and increases its ranking by the given number. - * - */ public void add(double index, Condition e) { if (ranking.containsKey(e)) { Double rank = ranking.get(e); @@ -86,20 +78,13 @@ public class RankedList extends ArrayLis } } - /** - * Adds the items to the ranked list and increases their ranking by their value in the given list. - * - */ public void addAll(RankedList list) { for (Condition each : list) { add(list.rankingOf(each), each); } } - /** - * Adds the items to the list and increases their ranking by one. - * - */ + @Override public boolean addAll(Collection<? extends Condition> c) { for (Condition each : c) { add(each); @@ -107,10 +92,6 @@ public class RankedList extends ArrayLis return true; } - /** - * Adds the items to the list and increases their ranking by the given number. - * - */ public boolean addAll(double index, Collection<? extends Condition> c) { for (Condition each : c) { add(index, each); @@ -118,6 +99,7 @@ public class RankedList extends ArrayLis return true; } + @Override public Condition remove(int index) { Condition element = super.get(index); if (element != null) { @@ -130,6 +112,7 @@ public class RankedList extends ArrayLis return null; } + @Override public boolean remove(Object o) { if (size() > 0) { if (contains(o) && ranking.containsKey(o)) { @@ -141,23 +124,28 @@ public class RankedList extends ArrayLis return false; } + @Override public List<Condition> subList(int start, int end) { return super.subList(start, end); } + @Override public boolean contains(Object o) { return super.contains(o); } + @Override public void clear() { super.clear(); ranking.clear(); } + @Override public int size() { return super.size(); } + @Override public RankedList clone() { RankedList clone = new RankedList(idf); for (Condition element : subList(0, size())) { @@ -166,16 +154,11 @@ public class RankedList extends ArrayLis return clone; } + @Override public Condition get(int i) { return super.get(i); } - /** - * Returns the ranking of the given object. - * - * @param each - * @return ranking - */ public double rankingOf(Condition each) { if (contains(each)) { return ranking.get(each).doubleValue(); @@ -187,9 +170,6 @@ public class RankedList extends ArrayLis return ranking; } - /** - * Sorts the elements of the list in order of its ranking. - */ public void sort() { List<Condition> newList = new ArrayList<Condition>(); for (int i = 0; i < size(); i++) { @@ -206,12 +186,6 @@ public class RankedList extends ArrayLis super.addAll(newList); } - /** - * Unites the lists and adds their values for common elements. - * - * @param list - * A list of the same type. - */ public RankedList unite(RankedList list) { RankedList clone = clone(); for (Condition element : list.subList(0, list.size())) { @@ -221,13 +195,6 @@ public class RankedList extends ArrayLis return clone; } - /** - * Removes all elements that are not part of both lists. Also adds the values of elements that - * occur in both lists. - * - * @param list - * A list of the same type. - */ public RankedList cut(RankedList list) { RankedList clone = clone(); for (Condition element : subList(0, size())) { @@ -241,13 +208,6 @@ public class RankedList extends ArrayLis return clone; } - /** - * Removes all elements of the given list that occur in this list. Also adds the values of - * elements that occur in both lists. - * - * @param list - * A list of the same type. - */ public RankedList subtract(RankedList list) { RankedList clone = clone(); for (Condition element : subList(0, size())) { @@ -275,10 +235,6 @@ public class RankedList extends ArrayLis return super.set(index, element); } - /** - * Adds the element to the list and assigns the ranking of the element located at the index. If - * the element already exists, it is only assigned the ranking value. - */ @Override public void add(int index, Condition element) { if (size() >= index) { Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/ShiftAllRule.java Mon Nov 18 12:19:31 2019 @@ -75,8 +75,9 @@ public class ShiftAllRule extends Shifti after = "{" + after + "}"; // this part will delete the original annotation - String unmark = errorBoundaryItem + (frontItemInBorders ? "" : " ANY") +"{STARTSWITH(" + annotation.getType().getShortName() - + ") -> UNMARK(" + annotation.getType().getShortName() + ", true)}"; + String unmark = errorBoundaryItem + (frontItemInBorders ? "" : " ANY") + "{STARTSWITH(" + + annotation.getType().getShortName() + ") -> UNMARK(" + + annotation.getType().getShortName() + ", true)}"; if (shiftToLeft) compileShiftToLeft(frontItemInBorders, rearItemInBorders, before, after, action, unmark); @@ -100,8 +101,7 @@ public class ShiftAllRule extends Shifti + rearBoundaryItem + ")}"; // like ANY*? but faster // ruleString += "#{-CONTAINS(" + errorBoundaryItem + ")} " + unmark + " #{-CONTAINS(" // + rearBoundaryItem + ")}"; // like ANY*? but faster - - + // Check, if the rear item should be included and mark all tokens between the brackets as // the // new annotation. @@ -128,8 +128,6 @@ public class ShiftAllRule extends Shifti ruleString += "ANY*{-PARTOF(" + rearBoundaryItem + ")}"; // like ANY*? but faster // ruleString += "#{-CONTAINS(" + rearBoundaryItem + ")}"; // like ANY*? but faster - - // Check, if the rear item should be included and mark all tokens between the brackets as // the // new annotation. @@ -194,11 +192,6 @@ public class ShiftAllRule extends Shifti return false; } - /** - * Sets the boundary item that marks the beginning of the original annotation. - * - * @param errorBoundaryItem - */ public void setErrorBoundaryItem(TrabalRuleItem errorBoundaryItem) { this.errorBoundaryItem = errorBoundaryItem; } Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/trabal/TrabalLearner.java Mon Nov 18 12:19:31 2019 @@ -139,9 +139,7 @@ public class TrabalLearner extends TextR this.additionalFolderPath = additionalFolderPath; } - /** - * Main method. Starts the algorithm. - */ + @Override protected void doRun() { try { getAnalysisEngine(); @@ -155,7 +153,8 @@ public class TrabalLearner extends TextR sendStatusUpdateToDelegate("Comparing documents...", TextRulerLearnerState.ML_RUNNING, true); for (int i = 0; i < algorithmIterations; i++) { actualResult += "BLOCK(Iteration_" + (i + 1) + ") Document{} {\n"; - sendStatusUpdateToDelegate("Comparing documents...", TextRulerLearnerState.ML_RUNNING, true); + sendStatusUpdateToDelegate("Comparing documents...", TextRulerLearnerState.ML_RUNNING, + true); idf = createIDF(); Map<String, List<AnnotationError>> errorGrps = createErrorGroups(); List<TrabalRule> rules = runAlgorithm(errorGrps); @@ -241,14 +240,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Compares the training data to the additional data and creates a mapping of the contained - * errors, where the key is a combination of the error type and the names of the concerned - * annotations, and the values are lists of errors that can be corrected through the same type of - * rule. - * - * @return map of errors - */ private Map<String, List<AnnotationError>> createErrorGroups() { errors = createErrorList(); Collections.sort(errors); @@ -300,9 +291,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates a list of annotation pairs and the fitting type of transformation action. - */ private List<AnnotationError> createErrorList() { positiveExamples = new HashMap<String, RankedList>(); List<AnnotationError> result = new ArrayList<AnnotationError>(); @@ -341,8 +329,9 @@ public class TrabalLearner extends TextR if (shouldAbort()) break; exampleIndex++; - sendStatusUpdateToDelegate("Comparing documents " + (i + 1) + " of " + goldStandard.size() - + ": example " + exampleIndex + " of " + gold.size(), + sendStatusUpdateToDelegate( + "Comparing documents " + (i + 1) + " of " + goldStandard.size() + ": example " + + exampleIndex + " of " + gold.size(), TextRulerLearnerState.ML_RUNNING, false); a = iterator.next(); docIterator = docs.iterator(); @@ -508,13 +497,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Is called by doRun(). Creates and tests rules for a given list of error groups created by - * createErrorGroups(). - * - * @param errorGrps - * @return The best found list of correction rules. - */ private List<TrabalRule> runAlgorithm(Map<String, List<AnnotationError>> errorGrps) { removeBasics(); inducedRules.clear(); @@ -534,8 +516,8 @@ public class TrabalLearner extends TextR if (basicRules.size() > maxNumberOfBasicRules) { basicRules = basicRules.subList(0, maxNumberOfBasicRules); } - sendStatusUpdateToDelegate("Testing basic rules: " + status, - TextRulerLearnerState.ML_RUNNING, false); + sendStatusUpdateToDelegate("Testing basic rules: " + status, TextRulerLearnerState.ML_RUNNING, + false); basicRules = testTrabalRulesOnDocumentSet(basicRules, exampleDocuments, additionalDocuments, "basic rules (" + i + " of " + errorGrps.size() + ")"); if (basicRules.size() > 0) { @@ -543,8 +525,8 @@ public class TrabalLearner extends TextR bestRulesForStatus.add(basicRules.get(0)); } result = actualResult + getRuleStrings(bestRulesForStatus); - sendStatusUpdateToDelegate("Testing basic rules: " + status, - TextRulerLearnerState.ML_RUNNING, true); + sendStatusUpdateToDelegate("Testing basic rules: " + status, TextRulerLearnerState.ML_RUNNING, + true); List<TrabalRule> learntRules = new ArrayList<TrabalRule>(); for (TrabalRule rule : basicRules) { if (rule.getCoveringStatistics().getCoveredPositivesCount() > 0 @@ -638,12 +620,6 @@ public class TrabalLearner extends TextR return learntRules; } - /** - * Chooses the best final rules from the results of runAlgorithm(). - * - * @param rules - * @return A non redundant list of correction rules. - */ private List<TrabalRule> getBest(List<TrabalRule> rules) { List<TrabalRule> result = new ArrayList<TrabalRule>(); Collections.sort(rules, enhancedComparator); @@ -713,12 +689,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Is called from getBest(). Applies the chosen rules to the additional documents, so the learner - * can ignore the already corrected errors in the next interation. - * - * @param rule - */ private void updateDocumentData(TrabalRule rule) { try { sendStatusUpdateToDelegate("Writing rules...", TextRulerLearnerState.ML_RUNNING, false); @@ -737,12 +707,6 @@ public class TrabalLearner extends TextR } } - /** - * Creates basic rules for the given types of errors. - * - * @param errors - * @return list of basic rules - */ public List<TrabalRule> createBasicRules(List<AnnotationError> errors) { List<TrabalRule> result = new ArrayList<TrabalRule>(); for (AnnotationError each : errors) { @@ -766,12 +730,6 @@ public class TrabalLearner extends TextR return getBestBasicRule(result); } - /** - * Chooses the rules which are expected to be the best. - * - * @param rules - * @return list of best basic rules - */ private List<TrabalRule> getBestBasicRule(List<TrabalRule> rules) { if (rules.size() < maxNumberOfBasicRules) return rules; @@ -821,12 +779,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for shifting type errors. - * - * @param error - * @return list of shifting rules - */ private List<TrabalRule> createShiftingRules(AnnotationError each) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExample error = each.getError(); @@ -848,15 +800,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for shifting errors, where both borders are shiftet to the same side or the - * size of the annotation is reduced. - * - * @param error - * The error example - * @param truth - * The truth example - */ private List<TrabalRule> createShiftAllRules(TextRulerExample error, TextRulerExample truth) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExampleDocument document = error.getDocument(); @@ -907,13 +850,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for expansion type errors. - * - * @param error - * @param truth - * @return list of expansion rules - */ private List<TrabalRule> createExpansionRules(TextRulerExample error, TextRulerExample truth) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExampleDocument document = error.getDocument(); @@ -939,17 +875,11 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for annotation type errors. - * - * @param error - * @return list of annotation rules - */ private List<TrabalRule> createAnnotationRules(AnnotationError each) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExample truth = each.getTruth(); - TextRulerExampleDocument document = additionalDocuments.getDocuments().get( - exampleDocuments.getDocuments().indexOf(truth.getDocument())); + TextRulerExampleDocument document = additionalDocuments.getDocuments() + .get(exampleDocuments.getDocuments().indexOf(truth.getDocument())); CAS cas = document.getCAS(); List<TrabalRuleItem> truthLeftBorder = getBorderOfExample(truth, document, cas, true); List<TrabalRuleItem> truthRightBorder = getBorderOfExample(truth, document, cas, false); @@ -967,12 +897,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for deletion type errors. - * - * @param error - * @return list of deletion rules - */ private List<TrabalRule> createDeletionRules(AnnotationError each) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExample error = each.getError(); @@ -982,12 +906,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates basic rules for correction type errors. - * - * @param error - * @return list of correction rules - */ private List<TrabalRule> createCorrectionRules(AnnotationError each) { List<TrabalRule> result = new ArrayList<TrabalRule>(); TextRulerExample error = each.getError(); @@ -998,16 +916,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Examines the border of an example annotation and returns all annotations before and on that - * border if looking at the left border or returns all annotations on and after the example if - * looking at the right border. - * - * @param example - * The example that should be examined - * @param examineLeftBorder - * set true for the left border or false for the right border to be examined - */ private List<TrabalRuleItem> getBorderOfExample(TextRulerExample example, TextRulerExampleDocument document, CAS cas, boolean examineLeftBorder) { List<TrabalRuleItem> result = new ArrayList<TrabalRuleItem>(); @@ -1055,12 +963,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Creates conditions for a given rule. - * - * @param rules - * @return ranked list of conditions - */ public RankedList createConditions(List<TrabalRule> rules) { if (rules.size() == 0) return new RankedList(idf); @@ -1075,12 +977,12 @@ public class TrabalLearner extends TextR error.addAll(createConditions(rule.getAnnotation())); truth.addAll(createConditions(rule.getTargetAnnotation())); } - result = error.subtract(truth.unite(getPositiveExamplesFor(rules.get(0) - .getTargetAnnotation().getType()))); + result = error.subtract( + truth.unite(getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType()))); setNegative(result); - result.addAll(truth.cut( - getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType())).subtract( - error)); + result.addAll( + truth.cut(getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType())) + .subtract(error)); return result; case CORRECTION: for (TrabalRule rule : rules) { @@ -1090,9 +992,9 @@ public class TrabalLearner extends TextR truth.unite(getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType()))); setNegative(result); if (getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType()).size() > 0) { - result.addAll(truth.cut( - getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType())).subtract( - getPositiveExamplesFor(rules.get(0).getAnnotation().getType()))); + result.addAll(truth + .cut(getPositiveExamplesFor(rules.get(0).getTargetAnnotation().getType())) + .subtract(getPositiveExamplesFor(rules.get(0).getAnnotation().getType()))); } else { result.addAll(truth); } @@ -1112,8 +1014,8 @@ public class TrabalLearner extends TextR } result = getPositiveExamplesFor(rules.get(0).getAnnotation().getType()).subtract(error); setNegative(result); - result.addAll(createConditions(rules.get(0).getAnnotation()).subtract( - getPositiveExamplesFor(rules.get(0).getAnnotation().getType()))); + result.addAll(createConditions(rules.get(0).getAnnotation()) + .subtract(getPositiveExamplesFor(rules.get(0).getAnnotation().getType()))); return result; } return null; @@ -1136,19 +1038,11 @@ public class TrabalLearner extends TextR } } - /** - * Creates conditions for a rule given by its boundaries and annotation type. - * - * @param frontBoundary - * @param rearBoundary - * @param truth - * @return ranked list of conditions - */ private RankedList createConditions(TrabalRuleItem frontBoundary, TrabalRuleItem rearBoundary, TrabalAnnotation truth) { RankedList result = new RankedList(idf); - TextRulerExampleDocument doc = additionalDocuments.getDocuments().get( - exampleDocuments.getDocuments().indexOf(truth.getDocument())); + TextRulerExampleDocument doc = additionalDocuments.getDocuments() + .get(exampleDocuments.getDocuments().indexOf(truth.getDocument())); CAS cas = doc.getCAS(); for (TrabalRuleItem item : getTermsBefore(frontBoundary, doc)) { result.add(new Condition(ConditionType.AFTER, item)); @@ -1166,8 +1060,9 @@ public class TrabalLearner extends TextR result.add(new Condition(ConditionType.STARTSWITH, item)); } } else { - for (List<TrabalRuleItem> list : getFirstTermsWithinBounds(frontBoundary.getAnnotation() - .getBegin(), rearBoundary.getAnnotation().getEnd(), doc, cas, 1)) { + for (List<TrabalRuleItem> list : getFirstTermsWithinBounds( + frontBoundary.getAnnotation().getBegin(), rearBoundary.getAnnotation().getEnd(), doc, + cas, 1)) { result.add(new Condition(ConditionType.STARTSWITH, list.get(0))); } } @@ -1176,20 +1071,15 @@ public class TrabalLearner extends TextR result.add(new Condition(ConditionType.ENDSWITH, item)); } } else { - for (List<TrabalRuleItem> list : getLastTermsWithinBounds(rearBoundary.getAnnotation() - .getBegin(), frontBoundary.getAnnotation().getEnd(), doc, cas, 1)) { + for (List<TrabalRuleItem> list : getLastTermsWithinBounds( + rearBoundary.getAnnotation().getBegin(), frontBoundary.getAnnotation().getEnd(), doc, + cas, 1)) { result.add(new Condition(ConditionType.ENDSWITH, list.get(0))); } } return result; } - /** - * Creates conditions for a rule, given by its annotation. - * - * @param error - * @return ranked list of conditions - */ private RankedList createConditions(TrabalAnnotation annotation) { RankedList result = new RankedList(idf); TrabalRuleItem ruleItem = new TrabalRuleItem(annotation); @@ -1221,17 +1111,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Returns all items directly preceding the given example rule item. - * - * @param ruleItem - * Examined rule item - * @param document - * Example document - * @param cas - * The CAS - * @return List of rule items that precede the given example - */ private List<TrabalRuleItem> getTermsBefore(TrabalRuleItem ruleItem, TextRulerExampleDocument document) { List<TrabalRuleItem> result = new ArrayList<TrabalRuleItem>(); @@ -1256,8 +1135,8 @@ public class TrabalLearner extends TextR nextEnd = a.getEnd(); } if (a.getEnd() >= nextEnd && a.getEnd() <= begin) { - TrabalRuleItem term = new TrabalRuleItem(new TrabalAnnotation(a, document, - enableFeatures)); + TrabalRuleItem term = new TrabalRuleItem( + new TrabalAnnotation(a, document, enableFeatures)); result.add(term); } } @@ -1267,17 +1146,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Returns all items directly following the given example rule item. - * - * @param ruleItem - * Examined rule item - * @param document - * Example document - * @param cas - * The CAS - * @return List of rule items that follow the given example - */ private List<TrabalRuleItem> getTermsAfter(TrabalRuleItem ruleItem, TextRulerExampleDocument document) { List<TrabalRuleItem> result = new ArrayList<TrabalRuleItem>(); @@ -1297,8 +1165,8 @@ public class TrabalLearner extends TextR nextBegin = a.getBegin(); } if (a.getBegin() <= nextBegin && a.getBegin() >= end) { - TrabalRuleItem term = new TrabalRuleItem(new TrabalAnnotation(a, document, - enableFeatures)); + TrabalRuleItem term = new TrabalRuleItem( + new TrabalAnnotation(a, document, enableFeatures)); result.add(term); } } @@ -1308,17 +1176,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Gets the left border within the bounds, that means a list of elements that start with the - * border and those that end right before it. - * - * @param startPos - * @param endPos - * @param document - * @param cas - * @param numberOfSideItems - * @return first term - */ private List<List<TrabalRuleItem>> getFirstTermsWithinBounds(int startPos, int endPos, TextRulerExampleDocument document, CAS cas, int numberOfSideItems) { List<List<TrabalRuleItem>> preItems = new ArrayList<List<TrabalRuleItem>>(); @@ -1348,7 +1205,8 @@ public class TrabalLearner extends TextR for (AnnotationFS each : startAs) { List<TrabalRuleItem> startList = new ArrayList<TrabalRuleItem>(); - TrabalRuleItem term = new TrabalRuleItem(new TrabalAnnotation(each, document, enableFeatures)); + TrabalRuleItem term = new TrabalRuleItem( + new TrabalAnnotation(each, document, enableFeatures)); startList.add(term); preItems.add(startList); } @@ -1356,17 +1214,6 @@ public class TrabalLearner extends TextR return preItems; } - /** - * Gets the right border within the bounds, that means a list of elements that end with the border - * and those that start right behind it. - * - * @param startPos - * @param endPos - * @param document - * @param cas - * @param numberOfSideItems - * @return last term - */ private List<List<TrabalRuleItem>> getLastTermsWithinBounds(int startPos, int endPos, TextRulerExampleDocument document, CAS cas, int numberOfSideItems) { List<List<TrabalRuleItem>> postItems = new ArrayList<List<TrabalRuleItem>>(); @@ -1397,7 +1244,8 @@ public class TrabalLearner extends TextR for (AnnotationFS each : endAs) { List<TrabalRuleItem> endList = new ArrayList<TrabalRuleItem>(); - TrabalRuleItem term = new TrabalRuleItem(new TrabalAnnotation(each, document, enableFeatures)); + TrabalRuleItem term = new TrabalRuleItem( + new TrabalAnnotation(each, document, enableFeatures)); endList.add(term); postItems.add(endList); } @@ -1405,17 +1253,6 @@ public class TrabalLearner extends TextR return postItems; } - /** - * Returns a list of rule elements that precede the given elements. - * - * @param lists - * @param till - * @param document - * @param cas - * @param index - * @param maxNumberOfItems - * @return updated list of items - */ private List<List<TrabalRuleItem>> addPreceding(List<List<TrabalRuleItem>> lists, int till, TextRulerExampleDocument document, CAS cas, int index, int maxNumberOfItems) { if (index >= maxNumberOfItems) { @@ -1449,17 +1286,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Returns a list of rule elements that follow on the given elements. - * - * @param lists - * @param till - * @param document - * @param cas - * @param index - * @param maxNumberOfItems - * @return updated list of items - */ private List<List<TrabalRuleItem>> addFollowing(List<List<TrabalRuleItem>> lists, int till, TextRulerExampleDocument document, CAS cas, int index, int maxNumberOfItems) { if (index >= maxNumberOfItems) { @@ -1493,9 +1319,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Returns a list of all items, consuming the given RuleItem. - */ private List<TrabalRuleItem> getConsumingTerms(TrabalRuleItem ruleItem, TextRulerExampleDocument document) { CAS cas = document.getCAS(); @@ -1516,15 +1339,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Returns a list of rule items within the given bounds. - * - * @param begin - * @param end - * @param doc - * @param cas - * @return list of terms - */ private List<TrabalRuleItem> getSingleTermsWithinBounds(int begin, int end, TextRulerExampleDocument doc, CAS cas) { Set<TrabalRuleItem> result = new HashSet<TrabalRuleItem>(); @@ -1544,16 +1358,6 @@ public class TrabalLearner extends TextR return new ArrayList<TrabalRuleItem>(result); } - /** - * Applies a set of rules to the additional data and compares it to the training data to rate - * them. - * - * @param rules - * @param documents - * @param additionalDocuments - * @param ruleSet - * @return list of rules - */ public List<TrabalRule> testTrabalRulesOnDocumentSet(List<TrabalRule> rules, final TextRulerExampleDocumentSet documents, final TextRulerExampleDocumentSet additionalDocuments, String ruleSet) { @@ -1575,16 +1379,15 @@ public class TrabalLearner extends TextR counter++; String ruleString = rule.getRuleString(); String ruleInfo = getRuleInfo(rule); - System.out.println("testing: " + ruleString); if (inducedRules.containsKey(ruleString)) { rule.setCoveringStatistics(inducedRules.get(ruleString)); - System.out.println("skipped with " + inducedRules.get(ruleString)); } else { for (int i = 0; i < goldDocs.size(); i++) { TextRulerExampleDocument goldDoc = goldDocs.get(i); TextRulerExampleDocument additionalDoc = additionalDocs.get(i); - sendStatusUpdateToDelegate("Testing " + ruleSet + ruleInfo + " on document " + (i + 1) - + " of " + goldDocs.size() + " : rule " + counter + " of " + rules.size(), + sendStatusUpdateToDelegate( + "Testing " + ruleSet + ruleInfo + " on document " + (i + 1) + " of " + + goldDocs.size() + " : rule " + counter + " of " + rules.size(), TextRulerLearnerState.ML_RUNNING, false); TextRulerStatisticsCollector sumC = new TextRulerStatisticsCollector(); prepareTestCas(theTestCAS, goldDoc, additionalDoc); @@ -1597,7 +1400,6 @@ public class TrabalLearner extends TextR pnorm = 1; } if (n / pnorm > maxErrorRate) { - System.out.println("stopped:" + sumC); break; } @@ -1636,13 +1438,6 @@ public class TrabalLearner extends TextR return ruleInfo; } - /** - * Resets the test CAS and adds the annotations of the additional data. - * - * @param testCas - * @param goldDoc - * @param additionalDoc - */ private void prepareTestCas(CAS testCas, TextRulerExampleDocument goldDoc, TextRulerExampleDocument additionalDoc) { testCas.reset(); @@ -1651,7 +1446,7 @@ public class TrabalLearner extends TextR testCas.setDocumentText(goldCas.getDocumentText()); CasCopier.copyCas(additionalCas, testCas, testCas.getDocumentText() == null); - + // for (AnnotationFS fs : additionalCas.getAnnotationIndex()) { // Type t = testCas.getTypeSystem().getType(fs.getType().getName()); // if (t != null) { @@ -1664,16 +1459,6 @@ public class TrabalLearner extends TextR // } } - /** - * Applies a rule to an element of the additional data and compares the result to the - * corresponding element of the training data. - * - * @param rule - * @param goldDoc - * @param additionalDoc - * @param c - * @param testCas - */ private void testRuleOnDocument(final TrabalRule rule, final TextRulerExampleDocument goldDoc, final TextRulerExampleDocument additionalDoc, final TextRulerStatisticsCollector c, CAS testCas) { @@ -1691,44 +1476,36 @@ public class TrabalLearner extends TextR TextRulerToolkit.writeCAStoXMIFile(testCas, tempDirectory() + "testCasProcessed.xmi"); } if (rule.getAnnotation() != null && rule.getTargetAnnotation() != null) { - compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, new TextRulerTarget( - rule.getAnnotation().getType().getName(), this), c, false); + compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, + new TextRulerTarget(rule.getAnnotation().getType().getName(), this), c, false); if (rule.getAnnotation().getType() != rule.getTargetAnnotation().getType()) { - compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, new TextRulerTarget( - rule.getTargetAnnotation().getType().getName(), this), c, false); + compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, + new TextRulerTarget(rule.getTargetAnnotation().getType().getName(), this), c, + false); } } else if (rule.getTargetAnnotation() != null) { - compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, new TextRulerTarget( - rule.getTargetAnnotation().getType().getName(), this), c, false); + compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, + new TextRulerTarget(rule.getTargetAnnotation().getType().getName(), this), c, + false); } else { - compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, new TextRulerTarget( - rule.getAnnotation().getType().getName(), this), c, false); + compareOriginalDocumentWithTestCAS(goldDoc, additionalDoc, testCas, + new TextRulerTarget(rule.getAnnotation().getType().getName(), this), c, false); } } catch (Exception e) { e.printStackTrace(); } } - /** - * Compares two documents to rate the results of an applied rule. - * - * @param goldDoc - * @param additionalDoc - * @param testCas - * @param target - * @param c - * @param collectNegativeExamples - */ private void compareOriginalDocumentWithTestCAS(TextRulerExampleDocument goldDoc, TextRulerExampleDocument additionalDoc, CAS testCas, TextRulerTarget target, TextRulerStatisticsCollector c, boolean collectNegativeExamples) { List<TextRulerExample> goldPositives = goldDoc.createSlotInstancesForCAS(goldDoc.getCAS(), target, true); - List<TextRulerExample> additionalPositives = additionalDoc.createSlotInstancesForCAS( - additionalDoc.getCAS(), target, true); - List<TextRulerExample> testPositives = goldDoc - .createSlotInstancesForCAS(testCas, target, false); + List<TextRulerExample> additionalPositives = additionalDoc + .createSlotInstancesForCAS(additionalDoc.getCAS(), target, true); + List<TextRulerExample> testPositives = goldDoc.createSlotInstancesForCAS(testCas, target, + false); List<TextRulerExample> baseFP = new ArrayList<TextRulerExample>(); for (TextRulerExample e : additionalPositives) { @@ -1740,8 +1517,8 @@ public class TrabalLearner extends TextR } List<TextRulerExample> baseFN = new ArrayList<TextRulerExample>(); for (TextRulerExample e : goldPositives) { - TextRulerExample example = TextRulerToolkit.exampleListContainsAnnotation( - additionalPositives, e.getAnnotation()); + TextRulerExample example = TextRulerToolkit.exampleListContainsAnnotation(additionalPositives, + e.getAnnotation()); if (example == null) { baseFN.add(e); } @@ -1775,8 +1552,8 @@ public class TrabalLearner extends TextR for (TextRulerExample e : baseFN) { TextRulerExample example = TextRulerToolkit.exampleListContainsAnnotation(testFN, e.getAnnotation()); - TextRulerExample coveredExample = TextRulerToolkit.exampleListContainsAnnotation( - goldPositives, e.getAnnotation()); + TextRulerExample coveredExample = TextRulerToolkit + .exampleListContainsAnnotation(goldPositives, e.getAnnotation()); if (example == null) { c.addCoveredPositive(coveredExample); } @@ -1804,22 +1581,10 @@ public class TrabalLearner extends TextR } } - /** - * Removes duplicate rules by their hash value. - * - * @param rules - * @return updated list - */ public static List<TrabalRule> removeDuplicateRules(List<TrabalRule> rules) { return new ArrayList<TrabalRule>(new HashSet<TrabalRule>(rules)); } - /** - * Returns a string, containing the rules of the given list. - * - * @param rules - * @return string representation of given rules - */ public static String getRuleStrings(List<TrabalRule> rules) { String result = ""; for (TrabalRule r : rules) { @@ -1828,12 +1593,6 @@ public class TrabalLearner extends TextR return result; } - /** - * Tests, if the given type is a slot type. - * - * @param type - * @return true, if given type is a slot type - */ public boolean isSlotType(Type type) { for (String slot : slotNames) { if (slot.equals(type.getName())) { @@ -1843,47 +1602,16 @@ public class TrabalLearner extends TextR return false; } + @Override public boolean collectNegativeCoveredInstancesWhenTesting() { return false; } + @Override public String getResultString() { return getFileHeaderString(true) + result; } - // // @Override - // public AnalysisEngine getAnalysisEngine() { - // if (ae == null) { - // String descriptorFile = TextRulerToolkit.getEngineDescriptorFromTMSourceFile(new Path( - // preprocessorTMFile)); - // sendStatusUpdateToDelegate("loading AE...", TextRulerLearnerState.ML_INITIALIZING, false); - // ae = TextRulerToolkit.loadAnalysisEngine(descriptorFile); - // - // // set filters to NO filtering so that we can add it manually with - // // the FILTERTYPE expression! - // String tempRulesFileName = getTempRulesFileName(); - // IPath path = new Path(tempRulesFileName); - // ae.setConfigParameterValue(RutaEngine.MAIN_SCRIPT, path.removeFileExtension() - // .lastSegment()); - // String portableString = path.removeLastSegments(1).toPortableString(); - // ae.setConfigParameterValue(RutaEngine.SCRIPT_PATHS, new String[] { portableString }); - // ae.setConfigParameterValue(RutaEngine.ADDITIONAL_SCRIPTS, new String[0]); - // ae.setConfigParameterValue(RutaEngine.RELOAD_SCRIPT, true); - // ae.setConfigParameterValue(RutaEngine.REMOVE_BASICS, true); - // - // try { - // ae.reconfigure(); - // } catch (ResourceConfigurationException e) { - // TextRulerPlugin.error(e); - // return null; - // } - // } - // return ae; - // } - - /** - * Return the set of additional documents and initializes it, if needed. - */ public TextRulerExampleDocumentSet getAdditionalDocuments() { if (additionalDocuments == null) { if (!StringUtils.isBlank(additionalFolderPath)) { @@ -1897,7 +1625,7 @@ public class TrabalLearner extends TextR return enableFeatures; } - // @Override + @Override public void setParameters(Map<String, Object> params) { if (TextRulerToolkit.DEBUG) saveParametersToTempFolder(params); @@ -1915,12 +1643,6 @@ public class TrabalLearner extends TextR enableFeatures = (Boolean) params.get(ENABLE_FEATURES_KEY); } - /** - * Creates a file in the *.csv format, containing TraBaL errors (error type, file name, false - * annotation and true annotation). - * - * @throws Exception - */ public void getErrorsAsCSV(String filePath) throws Exception { String result = ""; if (exampleDocuments == null) { @@ -1958,11 +1680,8 @@ public class TrabalLearner extends TextR } } - /** - * Compares trabal rules, disregarding the error rate, so basic rules with maximum number of - * covered positives are chosen. - */ protected Comparator<TrabalRule> basicComparator = new Comparator<TrabalRule>() { + @Override public int compare(TrabalRule o1, TrabalRule o2) { // coveredPositives if (o1.getCoveringStatistics().getCoveredPositivesCount() > o2.getCoveringStatistics() @@ -1988,19 +1707,17 @@ public class TrabalLearner extends TextR } }; - /** - * Compares trabal rules, starting with the error rate. - */ protected Comparator<TrabalRule> enhancedComparator = new Comparator<TrabalRule>() { + @Override public int compare(TrabalRule o1, TrabalRule o2) { // positives - negatives - if (o1.getCoveringStatistics().getCoveredPositivesCount() - - o1.getCoveringStatistics().getCoveredNegativesCount() > o2.getCoveringStatistics() - .getCoveredPositivesCount() - o2.getCoveringStatistics().getCoveredNegativesCount()) + if (o1.getCoveringStatistics().getCoveredPositivesCount() - o1.getCoveringStatistics() + .getCoveredNegativesCount() > o2.getCoveringStatistics().getCoveredPositivesCount() + - o2.getCoveringStatistics().getCoveredNegativesCount()) return -1; - if (o1.getCoveringStatistics().getCoveredPositivesCount() - - o1.getCoveringStatistics().getCoveredNegativesCount() < o2.getCoveringStatistics() - .getCoveredPositivesCount() - o2.getCoveringStatistics().getCoveredNegativesCount()) + if (o1.getCoveringStatistics().getCoveredPositivesCount() - o1.getCoveringStatistics() + .getCoveredNegativesCount() < o2.getCoveringStatistics().getCoveredPositivesCount() + - o2.getCoveringStatistics().getCoveredNegativesCount()) return 1; // coveredPositives if (o1.getCoveringStatistics().getCoveredPositivesCount() > o2.getCoveringStatistics() Modified: uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/wien/Wien.java URL: http://svn.apache.org/viewvc/uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/wien/Wien.java?rev=1869967&r1=1869966&r2=1869967&view=diff ============================================================================== --- uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/wien/Wien.java (original) +++ uima/uv3/ruta-v3/trunk/ruta-ep-textruler/src/main/java/org/apache/uima/ruta/textruler/learner/wien/Wien.java Mon Nov 18 12:19:31 2019 @@ -100,8 +100,8 @@ public class Wien extends TextRulerBasic TextRulerLearnerState.ML_RUNNING, false); if (!findRightPatterns()) allOk = false; - sendStatusUpdateToDelegate("Searching for left patterns...", - TextRulerLearnerState.ML_RUNNING, false); + sendStatusUpdateToDelegate("Searching for left patterns...", TextRulerLearnerState.ML_RUNNING, + false); if (!findLeftPatterns()) allOk = false; sendStatusUpdateToDelegate("Searching for head, tail and left1 patterns...", @@ -181,7 +181,6 @@ public class Wien extends TextRulerBasic boolean allFound = true; for (int k = 0; k < slotNames.length; k++) { List<TextRulerRulePattern> rightContexts = getRightContextForSlot(doc, k); - System.out.println(rightContexts.get(0)); int shortest = Integer.MAX_VALUE; for (TextRulerRulePattern p : rightContexts) shortest = p.size() < shortest ? p.size() : shortest; @@ -386,13 +385,13 @@ public class Wien extends TextRulerBasic } else { CAS cas = doc.getCAS(); TextRulerExample firstExample = doc.getPositiveExamples().get(0); - TextRulerExample lastExample = doc.getPositiveExamples().get( - doc.getPositiveExamples().size() - 1); + TextRulerExample lastExample = doc.getPositiveExamples() + .get(doc.getPositiveExamples().size() - 1); TypeSystem ts = cas.getTypeSystem(); Type tokenType = ts.getType(TextRulerToolkit.RUTA_ALL_TYPE_NAME); List<AnnotationFS> headTokens = TextRulerToolkit.getAnnotationsBeforePosition(cas, - firstExample.getAnnotations()[0].getBegin(), 0, TextRulerToolkit - .getFilterSetWithSlotNames(slotNames, filterSet), tokenType); + firstExample.getAnnotations()[0].getBegin(), 0, + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); TextRulerAnnotation[] lastExampleAnnotations = lastExample.getAnnotations(); List<AnnotationFS> tailTokens = TextRulerToolkit.getAnnotationsAfterPosition(cas, lastExampleAnnotations[lastExampleAnnotations.length - 1].getEnd(), 0, @@ -424,9 +423,9 @@ public class Wien extends TextRulerBasic TextRulerAnnotation[] exampleAnnotations2 = examples.get(i + 1).getAnnotations(); TextRulerAnnotation lastOf1 = exampleAnnotations1[exampleAnnotations1.length - 1]; TextRulerAnnotation firstOf2 = exampleAnnotations2[0]; - List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, lastOf1 - .getEnd(), firstOf2.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames( - slotNames, filterSet), tokenType); + List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, + lastOf1.getEnd(), firstOf2.getBegin(), + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); TextRulerRulePattern thePattern = new TextRulerRulePattern(); for (AnnotationFS afs : theTokens) thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc))); @@ -470,8 +469,8 @@ public class Wien extends TextRulerBasic TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); else theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, slotAnnotation.getEnd(), - nextSlotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames( - slotNames, filterSet), tokenType); + nextSlotAnnotation.getBegin(), + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); TextRulerRulePattern thePattern = new TextRulerRulePattern(); for (AnnotationFS afs : theTokens) thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc))); @@ -510,12 +509,12 @@ public class Wien extends TextRulerBasic List<AnnotationFS> theTokens; if (prevSlotAnnotation == null) - theTokens = TextRulerToolkit.getAnnotationsBeforePosition(cas, slotAnnotation.getBegin(), - 0, TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); + theTokens = TextRulerToolkit.getAnnotationsBeforePosition(cas, slotAnnotation.getBegin(), 0, + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); else theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, prevSlotAnnotation.getEnd(), - slotAnnotation.getBegin(), TextRulerToolkit.getFilterSetWithSlotNames(slotNames, - filterSet), tokenType); + slotAnnotation.getBegin(), + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); TextRulerRulePattern thePattern = new TextRulerRulePattern(); for (AnnotationFS afs : theTokens) thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc), true)); @@ -535,8 +534,8 @@ public class Wien extends TextRulerBasic for (TextRulerExample e : examples) { TextRulerAnnotation slotAnnotation = e.getAnnotations()[slotIndex]; List<AnnotationFS> theTokens = TextRulerToolkit.getAnnotationsWithinBounds(cas, - slotAnnotation.getBegin(), slotAnnotation.getEnd(), TextRulerToolkit - .getFilterSetWithSlotNames(slotNames, filterSet), tokenType); + slotAnnotation.getBegin(), slotAnnotation.getEnd(), + TextRulerToolkit.getFilterSetWithSlotNames(slotNames, filterSet), tokenType); TextRulerRulePattern thePattern = new TextRulerRulePattern(); for (AnnotationFS afs : theTokens) thePattern.add(new WienRuleItem(new TextRulerAnnotation(afs, doc))); @@ -620,8 +619,8 @@ public class Wien extends TextRulerBasic ((WienRuleItem) it).getWordConstraint().setGeneralizeLinkMarkUp(true); int l1Pos = restForL1.find(l1); if (l1Pos < 0 || l1Pos != restForL1.size() - l1.size()) { - TextRulerToolkit.logIf(logReasons, "REASON 1\n\tl1 \t" + l1 + "\n\trestforl1\t" - + restForL1); + TextRulerToolkit.logIf(logReasons, + "REASON 1\n\tl1 \t" + l1 + "\n\trestforl1\t" + restForL1); return constraint3ReturnType.C3_L1CandidateSuffixError; } @@ -690,6 +689,7 @@ public class Wien extends TextRulerBasic return constraint3ReturnType.C3_SUCCESS; } + @Override public String getResultString() { if (theRule == null) return "<no results yet>"; @@ -711,11 +711,8 @@ public class Wien extends TextRulerBasic result += ")};\n\n"; result += "BLOCK(findData) wien_content {\n" - + "\t// find out if tail is before the next occurence of l1\n" - + "\t" - + theRule.getRuleString() - + "\n" - + "\tDocument{->ASSIGN(wien_redo, false)};\n" + + "\t// find out if tail is before the next occurence of l1\n" + "\t" + + theRule.getRuleString() + "\n" + "\tDocument{->ASSIGN(wien_redo, false)};\n" + "\twien_tail{PARTOF(wien_rulemark)->UNMARK(wien_tail), ASSIGN(wien_redo, true)}; // remove tail marks that are no longer relevant for us after the last rule !\n" + "\tDocument{IF(wien_redo)->CALL(filename.findData)};\n" + "}\n"; @@ -725,6 +722,7 @@ public class Wien extends TextRulerBasic return result; } + @Override public void setParameters(Map<String, Object> params) { }
