Pei helped me track down an issue with performance I'd noticed in the dictionary annotator, and I have filed the issue here: https://issues.apache.org/jira/browse/CTAKES-143

I implemented a quick and dirty proof of concept fix and noticed dramatic performance improvement. I attached the patch to the issue, but it involves changing an interface (currently does not try to fix other implementing classes so obviously not ready for primetime), so I wanted to solicit the list first in case anyone with better knowledge of that module has some better engineering ideas than what I came up with.

Thanks,

--
Tim Miller, PhD
Postdoctoral Research Fellow
Children's Hospital Informatics Program
Children's Hospital Boston and Harvard Medical School
617-919-1223

Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java	(revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java	(working copy)
@@ -115,10 +115,8 @@
 				while (windowItr.hasNext()) {
 
 					Annotation window = (Annotation) windowItr.next();
-					List lookupTokensInWindow = constrainToWindow(
-							window,
-							lInit.getLookupTokenIterator(jcas));
-
+					List lookupTokensInWindow = lInit.getLookupTokenIterator(jcas, window);
+											
 					Map ctxMap = lInit.getContextMap(
 							jcas,
 							window.getBegin(),
Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java	(revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java	(working copy)
@@ -140,13 +140,12 @@
 		return iv_exclusionTagSet.contains(tag.toLowerCase());
 	}
 
-	public Iterator getLookupTokenIterator(JCas jcas)
+	public List getLookupTokenIterator(JCas jcas, Annotation covering)
 			throws AnnotatorInitializationException
 	{
 		List ltList = new ArrayList();
 
-		JFSIndexRepository indexes = jcas.getJFSIndexRepository();
-		Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
+		Iterator btaItr = org.uimafit.util.JCasUtil.selectCovered(jcas, BaseToken.class, covering).iterator();
 		while (btaItr.hasNext())
 		{
 			BaseToken bta = (BaseToken) btaItr.next();
@@ -184,7 +183,7 @@
 				ltList.add(lt);
 			}
 		}
-		return ltList.iterator();
+		return ltList;
 	}
 
 	public Iterator getLookupWindowIterator(JCas jcas)
Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java	(revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java	(working copy)
@@ -19,12 +19,14 @@
 package org.apache.ctakes.dictionary.lookup.ae;
 
 import java.util.Iterator;
+import java.util.List;
 import java.util.Map;
 
 import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
 import org.apache.ctakes.dictionary.lookup.algorithms.LookupAlgorithm;
 import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
 import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
 
 
 /**
@@ -50,7 +52,7 @@
      * @return Iterator over LookupToken objects.
      * @throws AnnotatorInitializationException
      */
-    public Iterator getLookupTokenIterator(JCas jcas)
+    public List getLookupTokenIterator(JCas jcas, Annotation covering)
             throws AnnotatorInitializationException;
 
     /**

Reply via email to