Revision: 18454 http://sourceforge.net/p/gate/code/18454 Author: ian_roberts Date: 2014-11-13 21:10:30 +0000 (Thu, 13 Nov 2014) Log Message: ----------- Working on consensus building tools. Classification jobs first because they're easy.
Modified Paths: -------------- gate/trunk/plugins/Crowd_Sourcing/.classpath Added Paths: ----------- gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/MajorityVoteClassificationConsensus.java Modified: gate/trunk/plugins/Crowd_Sourcing/.classpath =================================================================== --- gate/trunk/plugins/Crowd_Sourcing/.classpath 2014-11-13 02:20:16 UTC (rev 18453) +++ gate/trunk/plugins/Crowd_Sourcing/.classpath 2014-11-13 21:10:30 UTC (rev 18454) @@ -1,7 +1,7 @@ <?xml version="1.0" encoding="UTF-8"?> <classpath> <classpathentry kind="src" path="src"/> - <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/> + <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7"/> <classpathentry combineaccessrules="false" kind="src" path="/GATE"/> <classpathentry kind="lib" path="lib/gson-2.2.4.jar"/> <classpathentry kind="lib" path="lib/formsrt.jar"/> Added: gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/MajorityVoteClassificationConsensus.java =================================================================== --- gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/MajorityVoteClassificationConsensus.java (rev 0) +++ gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/MajorityVoteClassificationConsensus.java 2014-11-13 21:10:30 UTC (rev 18454) @@ -0,0 +1,265 @@ +package gate.crowdsource.classification; + +import gate.Annotation; +import gate.AnnotationSet; +import gate.Factory; +import gate.FeatureMap; +import gate.Utils; +import gate.creole.AbstractLanguageAnalyser; +import gate.creole.ExecutionException; +import gate.creole.ExecutionInterruptedException; +import gate.creole.metadata.CreoleParameter; +import gate.creole.metadata.CreoleResource; +import gate.creole.metadata.Optional; +import gate.creole.metadata.RunTime; +import gate.crowdsource.CrowdFlowerConstants; + +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +@CreoleResource(name = "Majority-vote consensus builder (classification)", comment = "Process results of a crowd annotation task to find " + + "where annotators agree and disagree.", helpURL = "http://gate.ac.uk/userguide/sec:crowd:classification") +public class MajorityVoteClassificationConsensus + extends + AbstractLanguageAnalyser { + + private static final long serialVersionUID = -6741876068621064245L; + + /** + * Defines the various actions that can be taken for units where there + * is insufficient agreement between crowd annotators. + */ + public static enum Action { + /** + * Move the disputed result annotations into one set so they can be + * resolved locally in GATE Developer. + */ + resolveLocally, + + /** + * Prepare a new crowd annotation task for disputed entities, + * offering just the options that were selected by the first round + * annotators. + */ + reAnnotateByCrowd + } + + private String resultASName; + + private String resultAnnotationType; + + private String answerFeatureName; + + private String originalEntityASName; + + private String entityAnnotationType; + + private String consensusASName; + + private String disputeASName; + + private Integer minimumAgreement; + + private Action noAgreementAction; + + public String getResultASName() { + return resultASName; + } + + @Optional + @RunTime + @CreoleParameter(comment = "Annotation set containing the annotations representing crowd judgments", defaultValue = "crowdResults") + public void setResultASName(String resultASName) { + this.resultASName = resultASName; + } + + public String getResultAnnotationType() { + return resultAnnotationType; + } + + @RunTime + @CreoleParameter(comment = "Type of the annotations representing crowd judgments", defaultValue = "Mention") + public void setResultAnnotationType(String resultAnnotationType) { + this.resultAnnotationType = resultAnnotationType; + } + + public String getAnswerFeatureName() { + return answerFeatureName; + } + + @RunTime + @CreoleParameter(comment = "The feature on result annotations giving the selected answer", defaultValue = "answer") + public void setAnswerFeatureName(String answerFeatureName) { + this.answerFeatureName = answerFeatureName; + } + + public String getOriginalEntityASName() { + return originalEntityASName; + } + + @Optional + @RunTime + @CreoleParameter(comment = "Annotation set containing the original entity annotations that were imported to form CrowdFlower units") + public void setOriginalEntityASName(String originalEntityASName) { + this.originalEntityASName = originalEntityASName; + } + + public String getEntityAnnotationType() { + return entityAnnotationType; + } + + @RunTime + @CreoleParameter(comment = "Type of the original entity annotations that were imported to form CrowdFlower units.", defaultValue = "Mention") + public void setEntityAnnotationType(String entityAnnotationType) { + this.entityAnnotationType = entityAnnotationType; + } + + public String getConsensusASName() { + return consensusASName; + } + + @Optional + @RunTime + @CreoleParameter(comment = "Annotation set into which consensus annotations (which meet the minimum agreement threshold) should be placed", defaultValue = "crowdConsensus") + public void setConsensusASName(String consensusASName) { + this.consensusASName = consensusASName; + } + + public String getDisputeASName() { + return disputeASName; + } + + @Optional + @RunTime + @CreoleParameter(comment = "Annotation set in which disputed annotations should be created. Exactly what form these annotations take depends on the noAgreementAction.", defaultValue = "crowdDisputed") + public void setDisputeASName(String disputeASName) { + this.disputeASName = disputeASName; + } + + public Integer getMinimumAgreement() { + return minimumAgreement; + } + + @RunTime + @CreoleParameter(comment = "Minimum number of annotators who must agree for the entity to be approved and moved into the consensus set.") + public void setMinimumAgreement(Integer minimumAgreement) { + this.minimumAgreement = minimumAgreement; + } + + public Action getNoAgreementAction() { + return noAgreementAction; + } + + @RunTime + @CreoleParameter(comment = "The action to take on annotations which do not meet the minimum agreement threshold.", defaultValue = "resolveLocally") + public void setNoAgreementAction(Action noAgreementAction) { + this.noAgreementAction = noAgreementAction; + } + + @SuppressWarnings("unchecked") + public void execute() throws ExecutionException { + if(isInterrupted()) throw new ExecutionInterruptedException(); + interrupted = false; + AnnotationSet allEntities = + getDocument().getAnnotations(originalEntityASName).get( + entityAnnotationType); + AnnotationSet allResults = + getDocument().getAnnotations(resultASName) + .get(resultAnnotationType); + AnnotationSet consensusAS = getDocument().getAnnotations(consensusASName); + AnnotationSet disputeAS = getDocument().getAnnotations(disputeASName); + for(Annotation origEntity : allEntities) { + AnnotationSet judgments = + Utils.getCoextensiveAnnotations(allResults, origEntity); + Map<String, Integer> answerCounts = new HashMap<String, Integer>(); + // tally up the answers + for(Annotation judgment : judgments) { + String answer = (String)judgment.getFeatures().get(answerFeatureName); + if(answer != null) { + Integer count = answerCounts.get(answer); + if(count == null) { + count = 1; + } else { + count += 1; + } + answerCounts.put(answer, count); + } + } + + // how many answers meet the threshold? + int answersOverThreshold = 0; + String agreedAnswer = null; + for(String answer : answerCounts.keySet()) { + if(answerCounts.get(answer) >= minimumAgreement) { + answersOverThreshold++; + agreedAnswer = answer; + } + } + + if(answersOverThreshold == 1) { + // if exactly one answer is over threshold, we have a winner + Utils.addAnn(consensusAS, origEntity, resultAnnotationType, + Utils.featureMap(answerFeatureName, agreedAnswer)); + } else { + // either no answer met the threshold, or more than one did + // (only possible if threshold is below half) - disputed. What + // we do next depends on the action setting. + if(noAgreementAction == Action.resolveLocally) { + // copy disputed answers into output set + for(Annotation judgment : judgments) { + FeatureMap fm = Factory.newFeatureMap(); + fm.putAll(judgment.getFeatures()); + Utils.addAnn(disputeAS, judgment, resultAnnotationType, fm); + } + } else if(noAgreementAction == Action.reAnnotateByCrowd) { + // create a copy of the original entity annotation, with a + // subset of options + FeatureMap fm = Factory.newFeatureMap(); + for(Object k : origEntity.getFeatures().keySet()) { + if(!CrowdFlowerConstants.UNIT_ID_FEATURE_NAME.equals(k) + && !"options".equals(k)) { + fm.put(k, origEntity.getFeatures().get(k)); + } + } + + // construct replacement "options" structure containing only + // the options that were used in the judgments set + Object origOptions = origEntity.getFeatures().get("options"); + try { + if(origOptions instanceof Collection<?>) { + Collection<String> newOptions = + origOptions.getClass().asSubclass(Collection.class) + .newInstance(); + for(Object origOption : (Collection<?>)origOptions) { + String strOption = origOption.toString(); + if(answerCounts.containsKey(strOption)) { + newOptions.add(strOption); + } + } + fm.put("options", newOptions); + } else if(origOptions instanceof Map<?, ?>) { + Map<String, Object> newOptions = + origOptions.getClass().asSubclass(Map.class) + .newInstance(); + for(Map.Entry<Object, Object> origOption : ((Map<Object, Object>)origOptions) + .entrySet()) { + String strOption = origOption.getKey().toString(); + if(answerCounts.containsKey(strOption)) { + newOptions.put(strOption, origOption.getValue()); + } + } + fm.put("options", newOptions); + } + } catch(InstantiationException | IllegalAccessException e) { + throw new ExecutionException("Couldn't create new options of type " + + origOptions.getClass().getName()); + } + Utils.addAnn(disputeAS, origEntity, entityAnnotationType, fm); + } + } + } + + } + +} Property changes on: gate/trunk/plugins/Crowd_Sourcing/src/gate/crowdsource/classification/MajorityVoteClassificationConsensus.java ___________________________________________________________________ Added: svn:keywords ## -0,0 +1 ## +Id \ No newline at end of property Added: svn:eol-style ## -0,0 +1 ## +native \ No newline at end of property This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Comprehensive Server Monitoring with Site24x7. Monitor 10 servers for $9/Month. Get alerted through email, SMS, voice calls or mobile push notifications. Take corrective actions from your mobile device. http://pubads.g.doubleclick.net/gampad/clk?id=154624111&iu=/4140/ostg.clktrk _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs