Author: pkluegl
Date: Tue Nov  4 12:32:11 2014
New Revision: 1636562

URL: http://svn.apache.org/r1636562
Log:
UIMA-4085
- fixed and added test

Added:
    
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
   (with props)
    
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
   (with props)
Modified:
    
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
    
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml

Modified: 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/PlainTextAnnotator.java
 Tue Nov  4 12:32:11 2014
@@ -36,6 +36,8 @@ public class PlainTextAnnotator extends 
   public static final String TYPE_LINE = "org.apache.uima.ruta.type.Line";
 
   public static final String TYPE_WSLINE = "org.apache.uima.ruta.type.WSLine";
+  
+  public static final String TYPE_EMPTYLINE = 
"org.apache.uima.ruta.type.EmptyLine";
 
   public static final String TYPE_PARAGRAPH = 
"org.apache.uima.ruta.type.Paragraph";
 
@@ -46,6 +48,7 @@ public class PlainTextAnnotator extends 
     BufferedReader br = new BufferedReader(new StringReader(documentText));
     Type lineType = cas.getTypeSystem().getType(TYPE_LINE);
     Type wsLineType = cas.getTypeSystem().getType(TYPE_WSLINE);
+    Type emptyLineType = cas.getTypeSystem().getType(TYPE_EMPTYLINE);
     Type paragraphType = cas.getTypeSystem().getType(TYPE_PARAGRAPH);
 
     int offsetTillNow = 0;
@@ -70,7 +73,13 @@ public class PlainTextAnnotator extends 
           paragraphBegin = offsetTillNow;
         }
 
-        if (wsLine && !emptyLine) {
+        if (wsLine && emptyLine) {
+          // do not create annotation with length 0
+          // instead append the line break to the annotation
+          AnnotationFS newEmptyLineFS = cas.createAnnotation(emptyLineType, 
offsetTillNow, offsetTillNow
+                  + nlLength);
+          cas.addFsToIndexes(newEmptyLineFS);
+        } else if (wsLine && !emptyLine) {
           AnnotationFS newWSLineFS = cas.createAnnotation(wsLineType, 
offsetTillNow, offsetTillNow
                   + eachLine.length());
           cas.addFsToIndexes(newWSLineFS);
@@ -89,6 +98,10 @@ public class PlainTextAnnotator extends 
           AnnotationFS newParaFS = cas.createAnnotation(paragraphType, 
paragraphBegin,
                   offsetAfterLine);
           cas.addFsToIndexes(newParaFS);
+        } else if (offsetAfterLine == documentText.length()) {
+          AnnotationFS newParaFS = cas.createAnnotation(paragraphType, 
paragraphBegin,
+                  offsetAfterLine);
+          cas.addFsToIndexes(newParaFS);
         }
         if (wsLine) {
           lastWasEmpty = true;

Modified: 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml?rev=1636562&r1=1636561&r2=1636562&view=diff
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
 (original)
+++ 
uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/PlainTextTypeSystem.xml
 Tue Nov  4 12:32:11 2014
@@ -1,4 +1,5 @@
 <?xml version="1.0" encoding="UTF-8"?>
+
 <!--
   Licensed to the Apache Software Foundation (ASF) under one
   or more contributor license agreements.  See the NOTICE file
@@ -17,7 +18,6 @@
   specific language governing permissions and limitations
   under the License.
 -->
-
 <typeSystemDescription xmlns="http://uima.apache.org/resourceSpecifier";>
   <name>PlainTextTypeSystem</name>
   <description/>
@@ -37,6 +37,11 @@
     <typeDescription>
       <name>org.apache.uima.ruta.type.WSLine</name>
       <description/>
+      <supertypeName>org.apache.uima.ruta.type.EmptyLine</supertypeName>
+    </typeDescription>
+    <typeDescription>
+      <name>org.apache.uima.ruta.type.EmptyLine</name>
+      <description/>
       <supertypeName>org.apache.uima.ruta.type.AnyLine</supertypeName>
     </typeDescription>
     <typeDescription>

Added: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java?rev=1636562&view=auto
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
 (added)
+++ 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
 Tue Nov  4 12:32:11 2014
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.uima.ruta.engine;
+
+import static org.junit.Assert.assertEquals;
+
+import java.io.File;
+import java.net.URL;
+
+import org.apache.uima.UIMAFramework;
+import org.apache.uima.analysis_engine.AnalysisEngine;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.text.AnnotationFS;
+import org.apache.uima.cas.text.AnnotationIndex;
+import org.apache.uima.resource.ResourceSpecifier;
+import org.apache.uima.util.FileUtils;
+import org.apache.uima.util.XMLInputSource;
+import org.junit.Test;
+
+public class PlainTextAnnotatorTest {
+
+  @Test
+  public void test() throws Exception {
+    String namespace = 
this.getClass().getPackage().getName().replaceAll("\\.", "/");
+    String name = namespace + "/" + "PlainTextAnnotatorTest.txt";
+    URL textURL = 
PlainTextAnnotatorTest.class.getClassLoader().getResource(name);
+    File textFile = new File(textURL.toURI());
+    String text = FileUtils.file2String(textFile, "UTF-8");
+    URL url = 
PlainTextAnnotator.class.getClassLoader().getResource("PlainTextAnnotator.xml");
+    if (url == null) {
+      url = HtmlAnnotator.class.getClassLoader().getResource(
+              "org/apache/uima/ruta/engine/PlainTextAnnotator.xml");
+    }
+    XMLInputSource in = new XMLInputSource(url);
+    ResourceSpecifier specifier = 
UIMAFramework.getXMLParser().parseResourceSpecifier(in);
+    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(specifier);
+    CAS cas = ae.newCAS();
+    AnnotationIndex<AnnotationFS> ai = null;
+
+    cas.setDocumentText(text);
+    ae.process(cas);
+    
+    ai = 
cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.AnyLine"));
+    assertEquals(18, ai.size());
+
+    ai = 
cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Line"));
+    assertEquals(10, ai.size());
+
+    ai = 
cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.EmptyLine"));
+    assertEquals(8, ai.size());
+
+    ai = 
cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.WSLine"));
+    assertEquals(4, ai.size());
+
+    ai = 
cas.getAnnotationIndex(cas.getTypeSystem().getType("org.apache.uima.ruta.type.Paragraph"));
+    assertEquals(4, ai.size());
+
+    ae.destroy();
+    cas.release();
+  }
+}

Propchange: 
uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
URL: 
http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt?rev=1636562&view=auto
==============================================================================
--- 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
 (added)
+++ 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
 Tue Nov  4 12:32:11 2014
@@ -0,0 +1,18 @@
+1 some text
+2 some text
+3 some text
+
+ 
+  
+  
+8 some text
+9 some text
+10 some text
+ 
+
+13 some text
+14 some text
+15 some text
+
+
+18 end
\ No newline at end of file

Propchange: 
uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/PlainTextAnnotatorTest.txt
------------------------------------------------------------------------------
    svn:eol-style = native


Reply via email to