[
https://issues.apache.org/jira/browse/UIMA-4049?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Richard Eckart de Castilho updated UIMA-4049:
---------------------------------------------
Description:
When annotations are removed from indexes, sometimes they come back... the
following test case shows how an annotation is removed but still present when
iterating over the index later.
{code}
@Test
public void testForZombies() throws Exception
{
// No zombie here
int[] offsets1 = { 0, 4, 5, 11, 12, 21, 22, 25, 26, 29, 30, 35, 36, 40,
41, 50, 51, 60, 61,
64, 64, 65 };
testForZombies("Dies flößte Friedrich II. für seine neue Eroberung
Besorgnis ein.", offsets1);
// Zombie hiding in here
int[] offsets2 = { 0, 3, 4, 7, 8, 13, 14, 18, 19, 22, 23, 33, 34, 35 };
testForZombies("Ich bin Franz III. von Hammerfels !", offsets2);
}
public void testForZombies(String aText, int[] aOffsets) throws Exception
{
// Init some dictionaries we ues
Set<String> names = new HashSet<String>();
names.add("Friedrich");
names.add("Franz");
Set<String> suffix = new HashSet<String>();
suffix.add("II.");
suffix.add("III.");
// Set up type system
TypeSystemDescription tsd = new TypeSystemDescription_impl();
tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION);
// Create CAS
CAS jcas = CasCreationUtils.createCas(tsd, null, null);
jcas.setDocumentText(aText);
Type tokenType = jcas.getTypeSystem().getType("Token");
Feature beginFeature = tokenType.getFeatureByBaseName("begin");
// Create tokens in CAS
for (int i = 0; i < aOffsets.length; i += 2) {
jcas.addFsToIndexes(jcas.createAnnotation(tokenType, aOffsets[i],
aOffsets[i+1]));
}
// List the tokens in the CAS
for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
System.out.printf("Starting with %s%n", token.getCoveredText());
}
// Merge some tokens, in particular "Franz" "III." -> "Franz III." and
"Friedrich" "II."
// into "Friedrich II."
AnnotationFS previous = null;
List<AnnotationFS> toDelete = new ArrayList<>();
for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
if (previous != null && names.contains(previous.getCoveredText())
&& suffix.contains(token.getCoveredText())) {
token.setIntValue(beginFeature, previous.getBegin());
toDelete.add(previous);
}
previous = token;
}
// Remove the no longer necessary tokens ("Friedrich" and "Franz"),
since we expanded the
// following tokens "III." and "II." to include their text
Set<String> removedWords = new HashSet<String>();
for (AnnotationFS token : toDelete) {
System.out.printf("Removing %s%n", token.getCoveredText());
removedWords.add(token.getCoveredText());
jcas.removeFsFromIndexes(token);
}
// Check if the tokens that we wanted to remove are really gone
for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
System.out.printf("Remaining %s%n", token.getCoveredText());
if (removedWords.contains(token.getCoveredText())) {
org.junit.Assert.fail("I saw a zombie!!!");
}
}
}
{code}
was:When annotations are removed from indexes, sometimes they come back...
the attached test case shows how an annotation is removed but still present
when iterating over the index later.
> The curious case of the zombie annotation
> -----------------------------------------
>
> Key: UIMA-4049
> URL: https://issues.apache.org/jira/browse/UIMA-4049
> Project: UIMA
> Issue Type: Bug
> Components: Core Java Framework
> Reporter: Richard Eckart de Castilho
> Assignee: Marshall Schor
>
> When annotations are removed from indexes, sometimes they come back... the
> following test case shows how an annotation is removed but still present when
> iterating over the index later.
> {code}
> @Test
> public void testForZombies() throws Exception
> {
> // No zombie here
> int[] offsets1 = { 0, 4, 5, 11, 12, 21, 22, 25, 26, 29, 30, 35, 36,
> 40, 41, 50, 51, 60, 61,
> 64, 64, 65 };
> testForZombies("Dies flößte Friedrich II. für seine neue Eroberung
> Besorgnis ein.", offsets1);
>
> // Zombie hiding in here
> int[] offsets2 = { 0, 3, 4, 7, 8, 13, 14, 18, 19, 22, 23, 33, 34, 35
> };
> testForZombies("Ich bin Franz III. von Hammerfels !", offsets2);
> }
> public void testForZombies(String aText, int[] aOffsets) throws Exception
> {
> // Init some dictionaries we ues
> Set<String> names = new HashSet<String>();
> names.add("Friedrich");
> names.add("Franz");
> Set<String> suffix = new HashSet<String>();
> suffix.add("II.");
> suffix.add("III.");
> // Set up type system
> TypeSystemDescription tsd = new TypeSystemDescription_impl();
> tsd.addType("Token", "", CAS.TYPE_NAME_ANNOTATION);
>
> // Create CAS
> CAS jcas = CasCreationUtils.createCas(tsd, null, null);
> jcas.setDocumentText(aText);
>
> Type tokenType = jcas.getTypeSystem().getType("Token");
> Feature beginFeature = tokenType.getFeatureByBaseName("begin");
>
> // Create tokens in CAS
> for (int i = 0; i < aOffsets.length; i += 2) {
> jcas.addFsToIndexes(jcas.createAnnotation(tokenType, aOffsets[i],
> aOffsets[i+1]));
> }
>
> // List the tokens in the CAS
> for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
> System.out.printf("Starting with %s%n", token.getCoveredText());
> }
> // Merge some tokens, in particular "Franz" "III." -> "Franz III."
> and "Friedrich" "II."
> // into "Friedrich II."
> AnnotationFS previous = null;
> List<AnnotationFS> toDelete = new ArrayList<>();
> for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
> if (previous != null && names.contains(previous.getCoveredText())
> && suffix.contains(token.getCoveredText())) {
> token.setIntValue(beginFeature, previous.getBegin());
> toDelete.add(previous);
> }
> previous = token;
> }
> // Remove the no longer necessary tokens ("Friedrich" and "Franz"),
> since we expanded the
> // following tokens "III." and "II." to include their text
> Set<String> removedWords = new HashSet<String>();
> for (AnnotationFS token : toDelete) {
> System.out.printf("Removing %s%n", token.getCoveredText());
> removedWords.add(token.getCoveredText());
> jcas.removeFsFromIndexes(token);
> }
> // Check if the tokens that we wanted to remove are really gone
> for (AnnotationFS token : jcas.getAnnotationIndex(tokenType)) {
> System.out.printf("Remaining %s%n", token.getCoveredText());
> if (removedWords.contains(token.getCoveredText())) {
> org.junit.Assert.fail("I saw a zombie!!!");
> }
> }
> }
> {code}
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)