Revision: 19035 http://sourceforge.net/p/gate/code/19035 Author: markagreenwood Date: 2015-12-14 12:05:27 +0000 (Mon, 14 Dec 2015) Log Message: ----------- checking in my speedup changes before I loose track of them -- the speed improvement can be huge on long documents which is nice
Modified Paths: -------------- gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy Modified: gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy =================================================================== --- gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy 2015-12-14 02:33:14 UTC (rev 19034) +++ gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy 2015-12-14 12:05:27 UTC (rev 19035) @@ -7,17 +7,22 @@ List<Annotation> mwList = new ArrayList<Annotation>(inputAS.get("MultiWord")); Collections.sort(mwList, new OffsetComparator()); -// the OffsetComparator only looks at the start offset for (int i=0 ; i < mwList.size() - 1 ; i++) { Annotation mwi = mwList.get(i); for (int j=i+1 ; j < mwList.size() ; j++) { Annotation mwj = mwList.get(j); + if (mwj.getStartNode().getOffset() > mwi.getStartNode().getOffset()) { + //if we've moved past the start offset of the outer annotation then + //because the annotations are sorted we know we'll never find a matching + //one so we can safely stop looking. + break; + } + if (mwj.getStartNode().getOffset().equals(mwi.getStartNode().getOffset()) && mwj.getEndNode().getOffset().equals(mwi.getEndNode().getOffset()) ) { inputAS.remove(mwi); - break; } } } @@ -42,9 +47,13 @@ exclusionTypes.add("Number"); AnnotationSet candidates = inputAS.get(termTypes); + +AnnotationSet excluded = inputAS.get(exclusionTypes); +AnnotationSet strongStop = inputAS.get("StrongStop"); + for (Annotation candidate : candidates) { // delete unwanted term candidates - if (! gate.Utils.getCoveringAnnotations(inputAS, candidate).get(exclusionTypes).isEmpty()) { + if (! gate.Utils.getCoveringAnnotations(excluded, candidate).isEmpty()) { FeatureMap newf = Factory.newFeatureMap(); newf.putAll(candidate.getFeatures()); String newType = "deleted_NE_" + candidate.getType(); @@ -52,7 +61,7 @@ inputAS.remove(candidate); } - else if (! gate.Utils.getContainedAnnotations(inputAS, candidate, "StrongStop").isEmpty()) { + else if (! gate.Utils.getContainedAnnotations(strongStop, candidate).isEmpty()) { FeatureMap newf = Factory.newFeatureMap(); newf.putAll(candidate.getFeatures()); String newType = "deleted_SS_" + candidate.getType(); This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs