Revision: 19035
          http://sourceforge.net/p/gate/code/19035
Author:   markagreenwood
Date:     2015-12-14 12:05:27 +0000 (Mon, 14 Dec 2015)
Log Message:
-----------
checking in my speedup changes before I loose track of them -- the speed 
improvement can be huge on long documents which is nice

Modified Paths:
--------------
    gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy

Modified: 
gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy
===================================================================
--- gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy  
2015-12-14 02:33:14 UTC (rev 19034)
+++ gate/trunk/plugins/TermRaider/resources/groovy/DeduplicateMultiWord.groovy  
2015-12-14 12:05:27 UTC (rev 19035)
@@ -7,17 +7,22 @@
 List<Annotation> mwList = new ArrayList<Annotation>(inputAS.get("MultiWord"));
 Collections.sort(mwList, new OffsetComparator());
 
-// the OffsetComparator only looks at the start offset
 for (int i=0 ; i < mwList.size() - 1 ; i++) {
   Annotation mwi = mwList.get(i);
   
   for (int j=i+1 ; j < mwList.size() ; j++) {
     Annotation mwj = mwList.get(j);
     
+    if (mwj.getStartNode().getOffset() > mwi.getStartNode().getOffset()) {
+       //if we've moved past the start offset of the outer annotation then
+       //because the annotations are sorted we know we'll never find a matching
+       //one so we can safely stop looking.
+       break;
+    }
+
     if (mwj.getStartNode().getOffset().equals(mwi.getStartNode().getOffset())
         && mwj.getEndNode().getOffset().equals(mwi.getEndNode().getOffset()) ) 
{
       inputAS.remove(mwi);
-      break;
     }
   }
 }
@@ -42,9 +47,13 @@
 exclusionTypes.add("Number");
 
 AnnotationSet candidates = inputAS.get(termTypes);
+
+AnnotationSet excluded = inputAS.get(exclusionTypes);
+AnnotationSet strongStop = inputAS.get("StrongStop");
+
 for (Annotation candidate : candidates) {
   // delete unwanted term candidates
-  if (! gate.Utils.getCoveringAnnotations(inputAS, 
candidate).get(exclusionTypes).isEmpty()) {
+  if (! gate.Utils.getCoveringAnnotations(excluded, candidate).isEmpty()) {
     FeatureMap newf = Factory.newFeatureMap();
     newf.putAll(candidate.getFeatures());
     String newType = "deleted_NE_" + candidate.getType();
@@ -52,7 +61,7 @@
     inputAS.remove(candidate);  
   }
   
-  else if (! gate.Utils.getContainedAnnotations(inputAS, candidate, 
"StrongStop").isEmpty()) {
+  else if (! gate.Utils.getContainedAnnotations(strongStop, 
candidate).isEmpty()) {
     FeatureMap newf = Factory.newFeatureMap();
     newf.putAll(candidate.getFeatures());
     String newType = "deleted_SS_" + candidate.getType();

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to