Author: rec
Date: Tue Aug 9 10:51:04 2016
New Revision: 1755595
URL: http://svn.apache.org/viewvc?rev=1755595&view=rev
Log:
[UIMA-5046] Faster indexCovered implementation
- Added new implementation (typically approx. twice as fast as previous)
- Code currently still contains a lot of debugging lines (inactive) and an
alternative purging method (inactive) to be removed later
- Unit test improved to report performance also for indexing and index lookups
- Increased number of iterations in randomized unit test to check new
implementation more thoroughly
Modified:
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/util/CasUtil.java
uima/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/util/JCasUtilTest.java
Modified:
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/util/CasUtil.java
URL:
http://svn.apache.org/viewvc/uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/util/CasUtil.java?rev=1755595&r1=1755594&r2=1755595&view=diff
==============================================================================
---
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/util/CasUtil.java
(original)
+++
uima/uimafit/trunk/uimafit-core/src/main/java/org/apache/uima/fit/util/CasUtil.java
Tue Aug 9 10:51:04 2016
@@ -26,15 +26,16 @@ import static java.util.Collections.empt
import static java.util.Collections.unmodifiableList;
import static java.util.Collections.unmodifiableMap;
+import java.util.ArrayDeque;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
+import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
-
import org.apache.uima.cas.ArrayFS;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASRuntimeException;
@@ -130,7 +131,13 @@ public final class CasUtil {
}
final Type type = aCas.getTypeSystem().getType(typeName);
if (type == null) {
- throw new IllegalArgumentException("Undeclared type [" + aTypename +
"]");
+ StringBuilder sb = new StringBuilder();
+ Iterator<Type> i = aCas.getTypeSystem().getTypeIterator();
+ while (i.hasNext()) {
+ sb.append(i.next().getName()).append('\n');
+ }
+ throw new IllegalArgumentException("Undeclared type [" + aTypename + "].
Available types: "
+ + sb);
}
return type;
}
@@ -772,16 +779,159 @@ public final class CasUtil {
}
}
};
- for (AnnotationFS s : select(cas, type)) {
- for (AnnotationFS u : selectCovered(cas, coveredType, s)) {
- Collection<AnnotationFS> c = index.get(s);
- if (c == EMPTY_LIST) {
- c = new LinkedList<AnnotationFS>();
- index.put(s, c);
+
+ // Get the covering and covered annotations
+ Collection<AnnotationFS> collType = select(cas, type);
+ Collection<AnnotationFS> collCoveredType = select(cas, coveredType);
+
+ // Convert them into array for faster access
+ AnnotationFS[] typeArray = collType.toArray(new
AnnotationFS[collType.size()]);
+ AnnotationFS[] coveredTypeArray = collCoveredType
+ .toArray(new AnnotationFS[collCoveredType.size()]);
+
+ // Keeps currently "open" annotations in a sorted order
+ Deque<AnnotationFS> memory = new ArrayDeque<>();
+ Deque<AnnotationFS> memory2 = new ArrayDeque<>();
+
+ // Array cursors
+ int o = 0;
+ int i = 0;
+
+ // Whether to log debug information
+ boolean debug = false;
+
+ while ((o < typeArray.length || !memory.isEmpty()) && i <
coveredTypeArray.length) {
+ // Always make sure the memory contains at least one covering annotation
to compare against
+ if (memory.isEmpty()) {
+ memory.push(typeArray[o]);
+ if (debug) {
+ System.out.printf("NEXT OUTER%n");
+ }
+ o++;
+ }
+ AnnotationFS bottom = memory.peek();
+
+ // Fast-forward over annotations which cannot be covered by the bottom
element because they
+ // start earlier.
+ AnnotationFS iFS = coveredTypeArray[i];
+ while (i < coveredTypeArray.length-1 && iFS.getBegin() <
bottom.getBegin()) {
+ if (debug) {
+ System.out.printf("Skipping inner: %d %d%n", iFS.getBegin(),
iFS.getEnd());
}
- c.add(u);
+ i++;
+ iFS = coveredTypeArray[i];
+ }
+
+ // Cache begin/end of current covered annotation for faster access
+ int iFSbegin = iFS.getBegin();
+ int iFSend = iFS.getEnd();
+
+ // If there is any chance that the covering annotations in the memory
contains the current
+ // covered ...
+ if (bottom.getBegin() <= iFS.getBegin()) {
+ // ... collect as many potentially covering annotations as possible
for the current covered
+ // annotation
+ while (o < typeArray.length && typeArray[o].getBegin() <= iFSbegin) {
+ memory.push(typeArray[o]);
+ o++;
+ }
+
+ if (debug) {
+ StringBuilder sb2 = new StringBuilder();
+ for (AnnotationFS f : memory) {
+ sb2.append(f.getBegin()).append("-").append(f.getEnd()).append("
");
+ }
+ System.out.printf("OUTER 1: %s%n", sb2);
+ System.out.printf("INNER : %d-%d%n", iFSbegin, iFSend);
+ }
+
+ // Record covered annotations
+ for (AnnotationFS covering : memory) {
+ if (covering.getBegin() <= iFSbegin && iFS.getEnd() <=
covering.getEnd()) {
+ Collection<AnnotationFS> c = index.get(covering);
+ if (c == EMPTY_LIST) {
+ c = new LinkedList<AnnotationFS>();
+ index.put(covering, c);
+ }
+ c.add(iFS);
+ if (debug) {
+ System.out.printf("-- %d-%d covers %d-%d%n",
covering.getBegin(), covering.getEnd(),
+ iFSbegin, iFSend);
+ }
+ }
+ else {
+ if (debug) {
+ System.out.printf("-- %d-%d NOT covers %d-%d%n",
covering.getBegin(),
+ covering.getEnd(), iFSbegin, iFSend);
+ }
+ }
+ }
+
+ if (debug) {
+ System.out.printf("NEXT INNER 1%n");
+ }
+ i++;
+ }
+ else {
+ if (debug) {
+ StringBuilder sb = new StringBuilder();
+ for (AnnotationFS f : memory) {
+ sb.append(f.getBegin()).append("-").append(f.getEnd()).append(" ");
+ }
+ System.out.printf("OUTER 2: %s%n", sb);
+ System.out.printf("INNER : %d-%d%n", iFSbegin, iFS.getEnd());
+ System.out.printf("HOPELESS!%n");
+
+ System.out.printf("NEXT INNER 2%n");
+ }
+ i++;
+ }
+
+ // Purge covering annotations from memory that cannot match anymore
given the currently
+ int purgeImpl = 0;
+ // exampled covered annotation
+ if (purgeImpl == 0) {
+ // Alternative implementation: re-uses memory
+ Iterator<AnnotationFS> purgeIterator = memory.iterator();
+ while (purgeIterator.hasNext()) {
+ AnnotationFS purgeCandidate = purgeIterator.next();
+ if (purgeCandidate.getEnd() < iFS.getBegin()) {
+ if (debug) {
+ System.out.printf("Dropping: %d-%d%n", purgeCandidate.getBegin(),
+ purgeCandidate.getEnd());
+ }
+ purgeIterator.remove();
+ }
+ }
+ }
+
+ // Alternative implementation: uses more memory but faster?
+ if (purgeImpl == 1) {
+ memory2.clear();
+ for (AnnotationFS purgeCandidate : memory) {
+ if (purgeCandidate.getEnd() < iFS.getBegin()) {
+ if (debug) {
+ System.out.printf("Dropping: %d-%d%n", purgeCandidate.getBegin(),
+ purgeCandidate.getEnd());
+ }
+ }
+ else {
+ memory2.add(purgeCandidate);
+ }
+ }
+
+ // Swap
+ Deque<AnnotationFS> buf = memory;
+ memory = memory2;
+ memory2 = buf;
+ }
+
+ if (debug) {
+ System.out.printf("outer: %d/%d inner: %d/%d%n", o, typeArray.length,
i,
+ coveredTypeArray.length);
}
}
+
return unmodifiableMap(index);
}
Modified:
uima/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/util/JCasUtilTest.java
URL:
http://svn.apache.org/viewvc/uima/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/util/JCasUtilTest.java?rev=1755595&r1=1755594&r2=1755595&view=diff
==============================================================================
---
uima/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/util/JCasUtilTest.java
(original)
+++
uima/uimafit/trunk/uimafit-core/src/test/java/org/apache/uima/fit/util/JCasUtilTest.java
Tue Aug 9 10:51:04 2016
@@ -125,7 +125,7 @@ public class JCasUtilTest extends Compon
@Test
public void testSelectCoverRandom() throws Exception {
- final int ITERATIONS = 10;
+ final int ITERATIONS = 25;
for (int i = 0; i < ITERATIONS; i++) {
CAS cas = jCas.getCas();
@@ -134,24 +134,41 @@ public class JCasUtilTest extends Compon
JCas jcas = cas.getJCas();
long timeNaive = 0;
long timeOptimized = 0;
- for (Token t : select(jcas, Token.class)) {
+
+ // Prepare the index
+ long timeIndexed = System.currentTimeMillis();
+ Map<Sentence, Collection<Token>> index = indexCovered(jcas,
Sentence.class, Token.class);
+ timeIndexed = System.currentTimeMillis() - timeIndexed;
+
+ for (Sentence t : select(jcas, Sentence.class)) {
long ti = System.currentTimeMillis();
// The naive approach is assumed to be correct
- List<Sentence> stem1 = selectCovered(jcas, Sentence.class,
t.getBegin(), t.getEnd());
+ List<Token> expected = selectCovered(jcas, Token.class, t.getBegin(),
t.getEnd());
timeNaive += System.currentTimeMillis() - ti;
+ // Record time for optimized selectCovered
ti = System.currentTimeMillis();
- List<Sentence> stem2 = selectCovered(jcas, Sentence.class, t);
+ List<Token> actual1 = selectCovered(jcas, Token.class, t);
timeOptimized += System.currentTimeMillis() - ti;
- Collection<Sentence> stem3 = indexCovered(jcas, Token.class,
Sentence.class).get(t);
+ // Record index lookup time
+ ti = System.currentTimeMillis();
+ Collection<Token> actual2 = index.get(t);
+ timeIndexed += System.currentTimeMillis() - ti;
- check(jcas, t, stem1, stem2);
- check(jcas, t, stem1, stem3);
+ check(jcas, t, expected, actual1);
+ check(jcas, t, expected, actual2);
+
+ // System.out.printf("%n--- OK ---------------%n%n");
}
- System.out.format("Speed up factor %.2f [naive:%d optimized:%d
diff:%d]\n",
- (double) timeNaive / (double) timeOptimized, timeNaive,
timeOptimized, timeNaive
- - timeOptimized);
+ System.out.printf(
+ "%3d Optimized: speed up factor %3.2f [naive:%4d optimized:%4d
(diff:%4d)]%n", i,
+ (double) timeNaive / (double) timeOptimized, timeNaive,
timeOptimized,
+ timeNaive - timeOptimized);
+ System.out.printf(
+ "%3d Indexed: speed up factor %3.2f [naive:%4d indexed :%4d
(diff:%4d)]%n%n", i,
+ (double) timeNaive / (double) timeIndexed, timeNaive,
timeIndexed,
+ timeNaive - timeIndexed);
}
}
@@ -274,7 +291,7 @@ public class JCasUtilTest extends Compon
return t;
}
- private void check(JCas jcas, Token t, Collection<? extends Annotation> a1,
+ private void check(JCas jcas, Annotation t, Collection<? extends Annotation>
a1,
Collection<? extends Annotation> a2) {
// List<Annotation> annos = new ArrayList<Annotation>();
// FSIterator fs = jcas.getAnnotationIndex().iterator();