Author: pkluegl Date: Tue Nov 4 12:32:11 2014 New Revision: 1636562 URL: http://svn.apache.org/r1636562 Log: UIMA-4085 - fixed and added test
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java (with props) uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt (with props) Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java?rev=1636562&r1=1636561&r2=1636562&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java Tue Nov 4 12:32:11 2014 @@ -36,6 +36,8 @@ public class PlainTextAnnotator extends public static final String TYPE_LINE = "org.apache.uima.ruta.type.Line"; public static final String TYPE_WSLINE = "org.apache.uima.ruta.type.WSLine"; + + public static final String TYPE_EMPTYLINE = "org.apache.uima.ruta.type.EmptyLine"; public static final String TYPE_PARAGRAPH = "org.apache.uima.ruta.type.Paragraph"; @@ -46,6 +48,7 @@ public class PlainTextAnnotator extends BufferedReader br = new BufferedReader(new StringReader(documentText)); Type lineType = cas.getTypeSystem().getType(TYPE_LINE); Type wsLineType = cas.getTypeSystem().getType(TYPE_WSLINE); + Type emptyLineType = cas.getTypeSystem().getType(TYPE_EMPTYLINE); Type paragraphType = cas.getTypeSystem().getType(TYPE_PARAGRAPH); int offsetTillNow = 0; @@ -70,7 +73,13 @@ public class PlainTextAnnotator extends paragraphBegin = offsetTillNow; } - if (wsLine && !emptyLine) { + if (wsLine && emptyLine) { + // do not create annotation with length 0 + // instead append the line break to the annotation + AnnotationFS newEmptyLineFS = cas.createAnnotation(emptyLineType, offsetTillNow, offsetTillNow + + nlLength); + cas.addFsToIndexes(newEmptyLineFS); + } else if (wsLine && !emptyLine) { AnnotationFS newWSLineFS = cas.createAnnotation(wsLineType, offsetTillNow, offsetTillNow + eachLine.length()); cas.addFsToIndexes(newWSLineFS); @@ -89,6 +98,10 @@ public class PlainTextAnnotator extends AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, offsetAfterLine); cas.addFsToIndexes(newParaFS); + } else if (offsetAfterLine == documentText.length()) { + AnnotationFS newParaFS = cas.createAnnotation(paragraphType, paragraphBegin, + offsetAfterLine); + cas.addFsToIndexes(newParaFS); } if (wsLine) { lastWasEmpty = true; Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml?rev=1636562&r1=1636561&r2=1636562&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml (original) +++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml Tue Nov 4 12:32:11 2014 @@ -1,4 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> + <!-- Licensed to the Apache Software Foundation (ASF) under one or more contributor license agreements. See the NOTICE file @@ -17,7 +18,6 @@ specific language governing permissions and limitations under the License. --> - <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier"> <name>PlainTextTypeSystem</name> <description/> @@ -37,6 +37,11 @@ <typeDescription> <name>org.apache.uima.ruta.type.WSLine</name> <description/> + <supertypeName>org.apache.uima.ruta.type.EmptyLine</supertypeName> + </typeDescription> + <typeDescription> + <name>org.apache.uima.ruta.type.EmptyLine</name> + <description/> <supertypeName>org.apache.uima.ruta.type.AnyLine</supertypeName> </typeDescription> <typeDescription> Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java?rev=1636562&view=auto ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java (added) +++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java Tue Nov 4 12:32:11 2014 @@ -0,0 +1,78 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.ruta.engine; + +import static org.junit.Assert.assertEquals; + +import java.io.File; +import java.net.URL; + +import org.apache.uima.UIMAFramework; +import org.apache.uima.analysis_engine.AnalysisEngine; +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.resource.ResourceSpecifier; +import org.apache.uima.util.FileUtils; +import org.apache.uima.util.XMLInputSource; +import org.junit.Test; + +public class PlainTextAnnotatorTest { + + @Test + public void test() throws Exception { + String namespace = this.getClass().getPackage().getName().replaceAll("\\.", "/"); + String name = namespace + "/" + "PlainTextAnnotatorTest.txt"; + URL textURL = PlainTextAnnotatorTest.class.getClassLoader().getResource(name); + File textFile = new File(textURL.toURI()); + String text = FileUtils.file2String(textFile, "UTF-8"); + URL url = PlainTextAnnotator.class.getClassLoader().getResource("PlainTextAnnotator.xml"); + if (url == null) { + url = HtmlAnnotator.class.getClassLoader().getResource( + "org/apache/uima/ruta/engine/PlainTextAnnotator.xml"); + } + XMLInputSource in = new XMLInputSource(url); + ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); + AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier); + CAS cas = ae.newCAS(); + AnnotationIndex<AnnotationFS> ai = null; + + cas.setDocumentText(text); + ae.process(cas); + + ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.AnyLine")); + assertEquals(18, ai.size()); + + ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Line")); + assertEquals(10, ai.size()); + + ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.EmptyLine")); + assertEquals(8, ai.size()); + + ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.WSLine")); + assertEquals(4, ai.size()); + + ai = cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Paragraph")); + assertEquals(4, ai.size()); + + ae.destroy(); + cas.release(); + } +} Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java ------------------------------------------------------------------------------ svn:eol-style = native Added: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt?rev=1636562&view=auto ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt (added) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt Tue Nov 4 12:32:11 2014 @@ -0,0 +1,18 @@ +1 some text +2 some text +3 some text + + + + +8 some text +9 some text +10 some text + + +13 some text +14 some text +15 some text + + +18 end \ No newline at end of file Propchange: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt ------------------------------------------------------------------------------ svn:eol-style = native