Thanks Peter. It's just a class, I put the code here and sent you directly
============================================================================
import static
org.apache.uima.fit.factory.AnalysisEngineFactory.createEngineDescription;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.net.URISyntaxException;
import org.antlr.runtime.RecognitionException;
import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.fit.factory.JCasFactory;
import org.apache.uima.fit.util.CasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.ruta.descriptor.RutaBuildOptions;
import org.apache.uima.ruta.descriptor.RutaDescriptorFactory;
import org.apache.uima.ruta.descriptor.RutaDescriptorInformation;
import org.apache.uima.ruta.engine.RutaEngine;
import org.junit.Test;
public class RutaAnnotatorTest {
// We try to create a Detection because an Attribute (" hello")
contains a
// detected value ("hello")
@Test
public void testSpaceProblem() throws UIMAException, IOException,
RecognitionException, URISyntaxException {
// Prepare data
String str = "attr: hello";
String rutaRule = "PACKAGE ruta;\n" //
+ "DECLARE Detection;\n" //
+ "DECLARE DetectedValue;\n" //
+ "DECLARE Attribute;\n" //
+ "BOOLEAN located;\n" //
+ "BLOCK(doc) Document{} {\n" //
+ " Document{ -> located = false};\n" //
+ " a1:Attribute{} -> {" //
+ " d1:DetectedValue{ -> located = true};" //
+ " };" //
+ " Document{located -> CREATE(Detection)};\n"//
+ "}\n"; //
// Prepare CAS
RutaDescriptorFactory factory = new RutaDescriptorFactory();
RutaDescriptorInformation descriptorInformation =
factory.parseDescriptorInformation(rutaRule);
RutaBuildOptions options = new RutaBuildOptions();
TypeSystemDescription typeSystemDescription =
factory.createTypeSystemDescription("", descriptorInformation,
options, null);
JCas cas = JCasFactory.createJCas(typeSystemDescription);
cas.setDocumentText(str);
Type attrType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Attribute");
AnnotationFS attr = cas.getCas().createAnnotation(attrType, 4, 10);
cas.addFsToIndexes(attr);
Type detectedValueType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.DetectedValue");
AnnotationFS detectedValue =
cas.getCas().createAnnotation(detectedValueType, 5, 10);
cas.addFsToIndexes(detectedValue);
// Execute Ruta
AnalysisEngineDescription ruta =
createEngineDescription(RutaEngine.class, RutaEngine.PARAM_RULES, rutaRule);
AnalysisEngine pipe = UIMAFramework.produceAnalysisEngine(ruta);
pipe.process(cas);
// Validate result
Type detectionType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Detection");
assertEquals(1, CasUtil.select(cas.getCas(), detectionType).size());
}
// We try to create a Detection because an Attribute ("\" hello\"")
contains a
// detected value ("llo")
@Test
public void testQuoteProblem() throws UIMAException, IOException,
RecognitionException, URISyntaxException {
// Prepare data
String str = "attr: \" hello\"";
String rutaRule = "PACKAGE ruta;\n" //
+ "DECLARE Detection;\n" //
+ "DECLARE DetectedValue;\n" //
+ "DECLARE Attribute;\n" //
+ "BOOLEAN located;\n" //
+ "BLOCK(doc) Document{} {\n" //
+ " Document{ -> located = false};\n" //
+ " a1:Attribute{} -> {" //
+ " d1:DetectedValue{ -> located = true};" //
+ " };" //
+ " Document{located -> CREATE(Detection)};\n"//
+ "}\n"; //
// Prepare CAS
RutaDescriptorFactory factory = new RutaDescriptorFactory();
RutaDescriptorInformation descriptorInformation =
factory.parseDescriptorInformation(rutaRule);
RutaBuildOptions options = new RutaBuildOptions();
TypeSystemDescription typeSystemDescription =
factory.createTypeSystemDescription("", descriptorInformation,
options, null);
JCas cas = JCasFactory.createJCas(typeSystemDescription);
cas.setDocumentText(str);
Type attrType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Attribute");
AnnotationFS attr = cas.getCas().createAnnotation(attrType, 5, 12);
cas.addFsToIndexes(attr);
Type detectedValueType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.DetectedValue");
AnnotationFS detectedValue =
cas.getCas().createAnnotation(detectedValueType, 9, 12);
cas.addFsToIndexes(detectedValue);
// Execute Ruta
AnalysisEngineDescription ruta =
createEngineDescription(RutaEngine.class, RutaEngine.PARAM_RULES, rutaRule);
AnalysisEngine pipe = UIMAFramework.produceAnalysisEngine(ruta);
pipe.process(cas);
// Validate result
Type detectionType = CasUtil.getAnnotationType(cas.getCas(),
"ruta.Anonymous.Detection");
assertEquals(1, CasUtil.select(cas.getCas(), detectionType).size());
}
}
============================================================================
2017-07-03 20:50 GMT+02:00 Peter Klügl <[email protected]>:
> Hi,
>
>
> I think this mailing list does not allow mail attachments, at least I do
> not see any.
>
>
> Can you upload the tests anywhere and post the links here? Or you can send
> the test directly to me. Or you can create a Jira issue and attach them
> there: https://issues.apache.org/jira/browse/UIMA-5474?jql=project%
> 20%3D%20UIMA%20AND%20component%20%3D%20Ruta
>
>
> Best,
>
>
> Peter
>
>
>
> Am 03.07.2017 um 14:47 schrieb Josep María Formentí Serra:
>
>> Hi,
>>
>> We've experimented some problems applying rules in texts that contains
>> spaces or special chars, in texts that contains some spaces or special
>> chars the rules are not applied properly.
>>
>> As example of this problems I attach 2 tests, these tests are using a
>> simplification of the kind of rules that we are using in our project.
>>
>> Best,
>> JM
>>
>
>
--
------------------------------------------------------------------- --- --
- - -
*Grupo AIA* - *www.aia.es <http://www.aia.es> *
Josep Mª Formentí Serra <[email protected]>
*[email protected] <[email protected]>*Dpto. Servicios Financieros y
Seguros
ESADECREAPOLIS, Sant Cugat, Barcelona
Telf.: +34 93 504 49 00 Fax.: +34 93 580 21 88
------------------------------------------------------------------- --- --
- - -
The information transmitted is intended only for the person or entity to
which it is addressed and may contain confidential and/or privileged
material. Any review, retransmission, dissemination or other use of, or
taking of any action in reliance upon, this information by persons or
entities other than the intended recipient is prohibited. If you received
this in error, please contact the sender and delete the material from any
computer.