Pei helped me track down an issue with performance I'd noticed in the
dictionary annotator, and I have filed the issue here:
https://issues.apache.org/jira/browse/CTAKES-143
I implemented a quick and dirty proof of concept fix and noticed
dramatic performance improvement. I attached the patch to the issue,
but it involves changing an interface (currently does not try to fix
other implementing classes so obviously not ready for primetime), so I
wanted to solicit the list first in case anyone with better knowledge of
that module has some better engineering ideas than what I came up with.
Thanks,
--
Tim Miller, PhD
Postdoctoral Research Fellow
Children's Hospital Informatics Program
Children's Hospital Boston and Harvard Medical School
617-919-1223
Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java (revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/DictionaryLookupAnnotator.java (working copy)
@@ -115,10 +115,8 @@
while (windowItr.hasNext()) {
Annotation window = (Annotation) windowItr.next();
- List lookupTokensInWindow = constrainToWindow(
- window,
- lInit.getLookupTokenIterator(jcas));
-
+ List lookupTokensInWindow = lInit.getLookupTokenIterator(jcas, window);
+
Map ctxMap = lInit.getContextMap(
jcas,
window.getBegin(),
Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java (revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/FirstTokenPermLookupInitializerImpl.java (working copy)
@@ -140,13 +140,12 @@
return iv_exclusionTagSet.contains(tag.toLowerCase());
}
- public Iterator getLookupTokenIterator(JCas jcas)
+ public List getLookupTokenIterator(JCas jcas, Annotation covering)
throws AnnotatorInitializationException
{
List ltList = new ArrayList();
- JFSIndexRepository indexes = jcas.getJFSIndexRepository();
- Iterator btaItr = indexes.getAnnotationIndex(BaseToken.type).iterator();
+ Iterator btaItr = org.uimafit.util.JCasUtil.selectCovered(jcas, BaseToken.class, covering).iterator();
while (btaItr.hasNext())
{
BaseToken bta = (BaseToken) btaItr.next();
@@ -184,7 +183,7 @@
ltList.add(lt);
}
}
- return ltList.iterator();
+ return ltList;
}
public Iterator getLookupWindowIterator(JCas jcas)
Index: src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java
===================================================================
--- src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java (revision 1431674)
+++ src/main/java/org/apache/ctakes/dictionary/lookup/ae/LookupInitializer.java (working copy)
@@ -19,12 +19,14 @@
package org.apache.ctakes.dictionary.lookup.ae;
import java.util.Iterator;
+import java.util.List;
import java.util.Map;
import org.apache.ctakes.dictionary.lookup.DictionaryEngine;
import org.apache.ctakes.dictionary.lookup.algorithms.LookupAlgorithm;
import org.apache.uima.analysis_engine.annotator.AnnotatorInitializationException;
import org.apache.uima.jcas.JCas;
+import org.apache.uima.jcas.tcas.Annotation;
/**
@@ -50,7 +52,7 @@
* @return Iterator over LookupToken objects.
* @throws AnnotatorInitializationException
*/
- public Iterator getLookupTokenIterator(JCas jcas)
+ public List getLookupTokenIterator(JCas jcas, Annotation covering)
throws AnnotatorInitializationException;
/**