Hi all,
I ran a simple aggregate analysis engine on two pure-text corpora,
performing preprocessing operations such as tokenization, lemmatization,
POS-tagging and so on.
The second step is applying a RUTA script to the resulting .xmi files.
The RUTA script contains rules of the form :
(Token.partOfSpeech == "Det"
NominalPhrase{-> MARK(Cause)}
Token.lemma == "bloquer"
Token.partOfSpeech == "Det"
NominalPhrase{-> MARK(Effect)}){-> MARK(Causality)};
Everything works fine for the first corpus, yet the second fails.
As a UIMA newcomer, I have trouble understanding the situation.
Could someone provide insight regarding this issue ?
Full stack is available at the end of this message, please feel free to
ask for more informations.
Thank you,
Kevin.
oct. 07, 2015 2:08:02 PM
org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl
callAnalysisComponentProcess(417)
GRAVE: Exception occurred
org.apache.uima.analysis_engine.AnalysisEngineProcessException:
Annotator processing failed.
at org.apache.uima.ruta.engine.RutaEngine.process(RutaEngine.java:547)
at
org.apache.uima.analysis_component.JCasAnnotator_ImplBase.process(JCasAnnotator_ImplBase.java:48)
at
org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.callAnalysisComponentProcess(PrimitiveAnalysisEngine_impl.java:385)
at
org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.processAndOutputNewCASes(PrimitiveAnalysisEngine_impl.java:308)
at
org.apache.uima.analysis_engine.impl.AnalysisEngineImplBase.process(AnalysisEngineImplBase.java:269)
at
org.apache.uima.ruta.ide.launching.RutaLauncher.processFile(RutaLauncher.java:169)
at
org.apache.uima.ruta.ide.launching.RutaLauncher.main(RutaLauncher.java:130)
Caused by: java.lang.StringIndexOutOfBoundsException: String index out
of range: 50275
at java.lang.String.substring(String.java:1950)
at
org.apache.uima.jcas.tcas.Annotation.getCoveredText(Annotation.java:122)
at
org.apache.uima.ruta.expression.feature.FeatureMatchExpression.checkFeatureValue(FeatureMatchExpression.java:121)
at
org.apache.uima.ruta.expression.feature.FeatureMatchExpression.checkFeatureValue(FeatureMatchExpression.java:84)
at
org.apache.uima.ruta.rule.RutaTypeMatcher.checkFeature(RutaTypeMatcher.java:227)
at
org.apache.uima.ruta.rule.RutaTypeMatcher.match(RutaTypeMatcher.java:196)
at
org.apache.uima.ruta.rule.RutaRuleElement.doMatch(RutaRuleElement.java:368)
at
org.apache.uima.ruta.rule.RutaRuleElement.startMatch(RutaRuleElement.java:73)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:84)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:74)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:74)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:47)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:40)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:29)
at org.apache.uima.ruta.RutaScriptBlock.apply(RutaScriptBlock.java:63)
at org.apache.uima.ruta.RutaModule.apply(RutaModule.java:48)
at org.apache.uima.ruta.engine.RutaEngine.process(RutaEngine.java:545)
... 6 more
Exception in thread "main"
org.apache.uima.analysis_engine.AnalysisEngineProcessException:
Annotator processing failed.
at org.apache.uima.ruta.engine.RutaEngine.process(RutaEngine.java:547)
at
org.apache.uima.analysis_component.JCasAnnotator_ImplBase.process(JCasAnnotator_ImplBase.java:48)
at
org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.callAnalysisComponentProcess(PrimitiveAnalysisEngine_impl.java:385)
at
org.apache.uima.analysis_engine.impl.PrimitiveAnalysisEngine_impl.processAndOutputNewCASes(PrimitiveAnalysisEngine_impl.java:308)
at
org.apache.uima.analysis_engine.impl.AnalysisEngineImplBase.process(AnalysisEngineImplBase.java:269)
at
org.apache.uima.ruta.ide.launching.RutaLauncher.processFile(RutaLauncher.java:169)
at
org.apache.uima.ruta.ide.launching.RutaLauncher.main(RutaLauncher.java:130)
Caused by: java.lang.StringIndexOutOfBoundsException: String index out
of range: 50275
at java.lang.String.substring(String.java:1950)
at
org.apache.uima.jcas.tcas.Annotation.getCoveredText(Annotation.java:122)
at
org.apache.uima.ruta.expression.feature.FeatureMatchExpression.checkFeatureValue(FeatureMatchExpression.java:121)
at
org.apache.uima.ruta.expression.feature.FeatureMatchExpression.checkFeatureValue(FeatureMatchExpression.java:84)
at
org.apache.uima.ruta.rule.RutaTypeMatcher.checkFeature(RutaTypeMatcher.java:227)
at
org.apache.uima.ruta.rule.RutaTypeMatcher.match(RutaTypeMatcher.java:196)
at
org.apache.uima.ruta.rule.RutaRuleElement.doMatch(RutaRuleElement.java:368)
at
org.apache.uima.ruta.rule.RutaRuleElement.startMatch(RutaRuleElement.java:73)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:84)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:74)
at
org.apache.uima.ruta.rule.ComposedRuleElement.startMatch(ComposedRuleElement.java:74)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:47)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:40)
at org.apache.uima.ruta.rule.RutaRule.apply(RutaRule.java:29)
at org.apache.uima.ruta.RutaScriptBlock.apply(RutaScriptBlock.java:63)
at org.apache.uima.ruta.RutaModule.apply(RutaModule.java:48)
at org.apache.uima.ruta.engine.RutaEngine.process(RutaEngine.java:545)
... 6 more