Author: pkluegl Date: Tue Nov 4 13:26:36 2014 New Revision: 1636576 URL: http://svn.apache.org/r1636576 Log: UIMA-4086 - extended definition of NBSP - NBSP isa SPACE - added test
Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java (with props) Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.java uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicTypeSystem.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/BasicTypeSystem.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TestEngine.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TypeAliasTestEngine.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/CWEngine.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SWEngine.xml uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SimpleEngine.xml Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/engine/RutaEngine.java Tue Nov 4 13:26:36 2014 @@ -230,7 +230,7 @@ public class RutaEngine extends JCasAnno public static final String PARAM_DEFAULT_FILTERED_TYPES = "defaultFilteredTypes"; @ConfigurationParameter(name = PARAM_DEFAULT_FILTERED_TYPES, mandatory = false, defaultValue = { - "org.apache.uima.ruta.type.SPACE", "org.apache.uima.ruta.type.NBSP", + "org.apache.uima.ruta.type.SPACE", "org.apache.uima.ruta.type.BREAK", "org.apache.uima.ruta.type.MARKUP" }) private String[] defaultFilteredTypes; Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex Tue Nov 4 13:26:36 2014 @@ -86,7 +86,7 @@ SPACE=[ \t] } - \xA0| |&NBSP; { + \u00A0|\u202F|\uFEFF|\u2007|\u180E| |&NBSP; { NBSP t = new NBSP(cas); t.setBegin(yychar); t.setEnd(yychar + yytext().length()); Modified: uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.java?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.java (original) +++ uima/ruta/trunk/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.java Tue Nov 4 13:26:36 2014 @@ -1,4 +1,4 @@ -/* The following code was generated by JFlex 1.4.3 on 28.11.12 14:06 */ +/* The following code was generated by JFlex 1.4.3 on 04.11.14 14:02 */ /* * Licensed to the Apache Software Foundation (ASF) under one @@ -19,10 +19,14 @@ * under the License. */ + package org.apache.uima.ruta.seed; +import java.util.*; +import java.util.regex.*; import org.apache.uima.cas.text.AnnotationFS; import org.apache.uima.jcas.JCas; + import org.apache.uima.ruta.type.AMP; import org.apache.uima.ruta.type.BREAK; import org.apache.uima.ruta.type.CAP; @@ -40,10 +44,12 @@ import org.apache.uima.ruta.type.SPACE; import org.apache.uima.ruta.type.SPECIAL; import org.apache.uima.ruta.type.SW; + /** - * This class is a scanner generated by <a href="http://www.jflex.de/">JFlex</a> 1.4.3 on 28.11.12 - * 14:06 from the specification file - * <tt>D:/work/workspace-ruta-uima/uimaj-ruta/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex</tt> + * This class is a scanner generated by + * <a href="http://www.jflex.de/">JFlex</a> 1.4.3 + * on 04.11.14 14:02 from the specification file + * <tt>D:/work/workspace-uima-ruta5/ruta/ruta-core/src/main/java/org/apache/uima/ruta/seed/SeedLexer.flex</tt> */ class SeedLexer { @@ -57,160 +63,208 @@ class SeedLexer { public static final int YYINITIAL = 0; /** - * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l ZZ_LEXSTATE[l+1] is the state in - * the DFA for the lexical state l at the beginning of a line l is of the form l = 2*k, k a non - * negative integer - */ - private static final int ZZ_LEXSTATE[] = { 0, 0 }; + * ZZ_LEXSTATE[l] is the state in the DFA for the lexical state l + * ZZ_LEXSTATE[l+1] is the state in the DFA for the lexical state l + * at the beginning of a line + * l is of the form l = 2*k, k a non negative integer + */ + private static final int ZZ_LEXSTATE[] = { + 0, 0 + }; - /** + /** * Translates characters to character classes */ - private static final String ZZ_CMAP_PACKED = "\10\0\1\4\1\3\1\4\2\0\1\4\22\0\1\3\1\7\4\0" - + "\1\12\5\0\1\25\1\0\1\26\1\6\12\2\1\24\1\17\1\5" - + "\1\0\1\10\1\27\1\0\1\1\1\21\13\1\1\20\1\1\1\23" - + "\2\1\1\22\7\1\6\0\1\31\1\14\13\31\1\13\1\31\1\16" - + "\2\31\1\15\7\31\45\0\1\11\11\0\1\30\12\0\1\30\4\0" - + "\1\30\5\0\27\32\1\0\7\32\30\30\1\0\10\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\2\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\2\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\2\32\1\30\1\32\1\30" - + "\1\32\3\30\2\32\1\30\1\32\1\30\2\32\1\30\3\32\2\30" - + "\4\32\1\30\2\32\1\30\3\32\3\30\2\32\1\30\2\32\1\30" - + "\1\32\1\30\1\32\1\30\2\32\1\30\1\32\2\30\1\32\1\30" - + "\2\32\1\30\3\32\1\30\1\32\1\30\2\32\2\30\1\0\1\32" - + "\3\30\4\0\1\32\1\0\1\30\1\32\1\0\1\30\1\32\1\0" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\2\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\2\30\1\32\1\0\1\30\1\32\1\30" - + "\3\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\4\30" - + "\31\0\140\30\326\0\1\32\1\0\3\32\1\0\1\32\1\0\2\32" - + "\1\30\21\32\1\0\11\32\43\30\1\0\2\30\3\32\3\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\5\30\1\32\1\30\1\0\1\32\1\30\2\32\1\30" - + "\4\0\60\32\60\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\10\0\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\2\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\0\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\2\0\1\32\1\30\6\0\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\41\0\46\32\12\0\47\30\u0b18\0" - + "\46\32\u0c3a\0\54\30\66\0\12\30\224\0\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30" - + "\1\32\1\30\1\32\1\30\1\32\7\30\4\0\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32" - + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\6\0\10\30\10\32" - + "\6\30\2\0\6\32\2\0\10\30\10\32\10\30\10\32\6\30\2\0" - + "\6\32\2\0\10\30\1\0\1\32\1\0\1\32\1\0\1\32\1\0" - + "\1\32\10\30\10\32\16\30\2\0\10\30\10\0\10\30\10\0\10\30" - + "\10\0\5\30\1\0\2\30\4\32\2\0\1\30\3\0\3\30\1\0" - + "\2\30\4\32\4\0\4\30\2\0\2\30\4\32\4\0\10\30\5\32" - + "\5\0\3\30\1\0\2\30\4\32\165\0\1\30\15\0\1\30\202\0" - + "\1\32\4\0\1\32\2\0\1\30\3\32\2\30\3\32\1\30\1\0" - + "\1\32\3\0\5\32\6\0\1\32\1\0\1\32\1\0\1\32\1\0" - + "\4\32\1\0\1\30\2\32\1\0\1\32\1\30\4\0\1\30\3\0" - + "\1\30\2\32\5\0\1\32\4\30\ud9b6\0\7\30\14\0\5\30\u0409\0" + "\32\32\6\0\32\30\245\0"; + private static final String ZZ_CMAP_PACKED = + "\10\0\1\4\1\3\1\4\2\0\1\4\22\0\1\3\1\7\4\0"+ + "\1\12\5\0\1\25\1\0\1\26\1\6\12\2\1\24\1\17\1\5"+ + "\1\0\1\10\1\27\1\0\1\1\1\21\13\1\1\20\1\1\1\23"+ + "\2\1\1\22\7\1\6\0\1\31\1\14\13\31\1\13\1\31\1\16"+ + "\2\31\1\15\7\31\45\0\1\11\11\0\1\30\12\0\1\30\4\0"+ + "\1\30\5\0\27\32\1\0\7\32\30\30\1\0\10\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\2\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\2\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\2\32\1\30\1\32\1\30"+ + "\1\32\3\30\2\32\1\30\1\32\1\30\2\32\1\30\3\32\2\30"+ + "\4\32\1\30\2\32\1\30\3\32\3\30\2\32\1\30\2\32\1\30"+ + "\1\32\1\30\1\32\1\30\2\32\1\30\1\32\2\30\1\32\1\30"+ + "\2\32\1\30\3\32\1\30\1\32\1\30\2\32\2\30\1\0\1\32"+ + "\3\30\4\0\1\32\1\0\1\30\1\32\1\0\1\30\1\32\1\0"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\2\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\2\30\1\32\1\0\1\30\1\32\1\30"+ + "\3\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\7\30"+ + "\2\32\1\30\2\32\2\30\1\32\1\30\4\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\105\30\1\0\44\30\7\0\2\30"+ + "\36\0\5\30\140\0\1\30\52\0\1\32\1\30\1\32\1\30\2\0"+ + "\1\32\1\30\2\0\4\30\10\0\1\32\1\0\3\32\1\0\1\32"+ + "\1\0\2\32\1\30\21\32\1\0\11\32\43\30\1\32\2\30\3\32"+ + "\3\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\5\30\1\32\1\30\1\0\1\32\1\30"+ + "\2\32\2\30\63\32\60\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\10\0\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\2\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\2\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\11\0\46\32\12\0\47\30\u0b18\0"+ + "\46\32\1\0\1\32\5\0\1\32\u0740\0\1\11\u04f1\0\300\30\100\0"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\11\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\11\30\10\32\6\30"+ + "\2\0\6\32\2\0\10\30\10\32\10\30\10\32\6\30\2\0\6\32"+ + "\2\0\10\30\1\0\1\32\1\0\1\32\1\0\1\32\1\0\1\32"+ + "\10\30\10\32\16\30\2\0\10\30\10\0\10\30\10\0\10\30\10\0"+ + "\5\30\1\0\2\30\4\32\2\0\1\30\3\0\3\30\1\0\2\30"+ + "\4\32\4\0\4\30\2\0\2\30\4\32\4\0\10\30\5\32\5\0"+ + "\3\30\1\0\2\30\4\32\13\0\1\11\47\0\1\11\101\0\1\30"+ + "\15\0\1\30\20\0\15\30\145\0\1\32\4\0\1\32\2\0\1\30"+ + "\3\32\2\30\3\32\1\30\1\0\1\32\3\0\5\32\6\0\1\32"+ + "\1\0\1\32\1\0\1\32\1\0\4\32\1\0\1\30\4\32\1\30"+ + "\4\0\1\30\2\0\2\30\2\32\5\0\1\32\4\30\4\0\1\30"+ + "\21\0\20\32\20\30\3\0\1\32\1\30\u0331\0\32\32\32\30\u0716\0"+ + "\57\32\1\0\57\30\1\0\1\32\1\30\3\32\2\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\4\32\1\30\1\32\2\30\1\32\10\30"+ + "\3\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\2\30"+ + "\6\0\1\32\1\30\1\32\1\30\3\0\1\32\1\30\14\0\46\30"+ + "\1\0\1\30\5\0\1\30\u7912\0\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\22\0\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\212\0\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\3\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32"+ + "\1\30\1\32\12\30\1\32\1\30\1\32\1\30\2\32\1\30\1\32"+ + "\1\30\1\32\1\30\1\32\1\30\1\32\1\30\3\0\1\32\1\30"+ + "\1\32\1\30\1\0\1\32\1\30\1\32\1\30\14\0\1\32\1\30"+ + "\1\32\1\30\1\32\1\30\1\32\1\30\1\32\1\30\1\32\115\0"+ + "\3\30\u5305\0\7\30\14\0\5\30\u03e7\0\1\11\41\0\32\32\6\0"+ + "\32\30\245\0"; - /** + /** * Translates characters to character classes */ - private static final char[] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); + private static final char [] ZZ_CMAP = zzUnpackCMap(ZZ_CMAP_PACKED); - /** + /** * Translates DFA states to action switch labels. */ - private static final int[] ZZ_ACTION = zzUnpackAction(); + private static final int [] ZZ_ACTION = zzUnpackAction(); - private static final String ZZ_ACTION_PACKED_0 = "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6" - + "\1\7\1\1\1\10\1\11\1\12\1\13\1\14\1\15" + "\1\16\1\2\5\0\1\17\1\20\5\0"; + private static final String ZZ_ACTION_PACKED_0 = + "\1\0\1\1\1\2\1\3\1\4\1\5\1\1\1\6"+ + "\1\7\1\1\1\10\1\11\1\12\1\13\1\14\1\15"+ + "\1\16\1\2\5\0\1\17\1\20\5\0"; - private static int[] zzUnpackAction() { - int[] result = new int[30]; + private static int [] zzUnpackAction() { + int [] result = new int[30]; int offset = 0; offset = zzUnpackAction(ZZ_ACTION_PACKED_0, offset, result); return result; } - private static int zzUnpackAction(String packed, int offset, int[] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ + private static int zzUnpackAction(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ int l = packed.length(); while (i < l) { int count = packed.charAt(i++); int value = packed.charAt(i++); - do - result[j++] = value; - while (--count > 0); + do result[j++] = value; while (--count > 0); } return j; } - /** + + /** * Translates a state to a row index in the transition table */ - private static final int[] ZZ_ROWMAP = zzUnpackRowMap(); + private static final int [] ZZ_ROWMAP = zzUnpackRowMap(); - private static final String ZZ_ROWMAP_PACKED_0 = "\0\0\0\33\0\66\0\121\0\33\0\33\0\154\0\33" - + "\0\33\0\207\0\242\0\33\0\33\0\33\0\33\0\33" - + "\0\275\0\330\0\363\0\u010e\0\u0129\0\u0144\0\u015f\0\33" - + "\0\33\0\u017a\0\u0195\0\u01b0\0\u01cb\0\u01e6"; + private static final String ZZ_ROWMAP_PACKED_0 = + "\0\0\0\33\0\66\0\121\0\33\0\33\0\154\0\33"+ + "\0\33\0\207\0\242\0\33\0\33\0\33\0\33\0\33"+ + "\0\275\0\330\0\363\0\u010e\0\u0129\0\u0144\0\u015f\0\33"+ + "\0\33\0\u017a\0\u0195\0\u01b0\0\u01cb\0\u01e6"; - private static int[] zzUnpackRowMap() { - int[] result = new int[30]; + private static int [] zzUnpackRowMap() { + int [] result = new int[30]; int offset = 0; offset = zzUnpackRowMap(ZZ_ROWMAP_PACKED_0, offset, result); return result; } - private static int zzUnpackRowMap(String packed, int offset, int[] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ + private static int zzUnpackRowMap(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ int l = packed.length(); while (i < l) { int high = packed.charAt(i++) << 16; @@ -219,80 +273,88 @@ class SeedLexer { return j; } - /** + /** * The transition table of the DFA */ - private static final int[] ZZ_TRANS = zzUnpackTrans(); + private static final int [] ZZ_TRANS = zzUnpackTrans(); - private static final String ZZ_TRANS_PACKED_0 = "\1\2\1\3\1\4\1\5\1\6\1\7\1\2\1\10" - + "\1\2\1\11\1\12\4\13\1\14\4\3\1\15\1\16" + "\1\17\1\20\2\13\1\3\34\0\1\21\11\0\4\22" - + "\1\0\4\21\4\0\2\22\1\21\2\0\1\4\36\0" + "\1\23\1\24\24\0\1\25\11\0\1\26\3\25\1\0" - + "\1\27\3\25\5\0\1\25\14\0\4\13\11\0\2\13" + "\2\0\1\21\16\0\4\21\6\0\1\21\13\0\4\22" - + "\11\0\2\22\10\0\1\24\23\0\10\24\1\30\22\24" + "\1\0\1\25\11\0\4\25\1\31\4\25\5\0\1\25" - + "\2\0\1\25\11\0\1\25\1\32\2\25\1\31\4\25" + "\5\0\1\25\2\0\1\25\11\0\4\25\1\31\1\25" - + "\1\33\2\25\5\0\1\25\2\0\1\25\11\0\2\25" + "\1\34\1\25\1\31\4\25\5\0\1\25\2\0\1\25" - + "\11\0\4\25\1\31\2\25\1\35\1\25\5\0\1\25" + "\2\0\1\25\11\0\3\25\1\36\1\31\4\25\5\0" - + "\1\25\2\0\1\25\11\0\4\25\1\31\3\25\1\36" + "\5\0\1\25\2\0\1\25\11\0\4\25\1\11\4\25" - + "\5\0\1\25\1\0"; + private static final String ZZ_TRANS_PACKED_0 = + "\1\2\1\3\1\4\1\5\1\6\1\7\1\2\1\10"+ + "\1\2\1\11\1\12\4\13\1\14\4\3\1\15\1\16"+ + "\1\17\1\20\2\13\1\3\34\0\1\21\11\0\4\22"+ + "\1\0\4\21\4\0\2\22\1\21\2\0\1\4\36\0"+ + "\1\23\1\24\24\0\1\25\11\0\1\26\3\25\1\0"+ + "\1\27\3\25\5\0\1\25\14\0\4\13\11\0\2\13"+ + "\2\0\1\21\16\0\4\21\6\0\1\21\13\0\4\22"+ + "\11\0\2\22\10\0\1\24\23\0\10\24\1\30\22\24"+ + "\1\0\1\25\11\0\4\25\1\31\4\25\5\0\1\25"+ + "\2\0\1\25\11\0\1\25\1\32\2\25\1\31\4\25"+ + "\5\0\1\25\2\0\1\25\11\0\4\25\1\31\1\25"+ + "\1\33\2\25\5\0\1\25\2\0\1\25\11\0\2\25"+ + "\1\34\1\25\1\31\4\25\5\0\1\25\2\0\1\25"+ + "\11\0\4\25\1\31\2\25\1\35\1\25\5\0\1\25"+ + "\2\0\1\25\11\0\3\25\1\36\1\31\4\25\5\0"+ + "\1\25\2\0\1\25\11\0\4\25\1\31\3\25\1\36"+ + "\5\0\1\25\2\0\1\25\11\0\4\25\1\11\4\25"+ + "\5\0\1\25\1\0"; - private static int[] zzUnpackTrans() { - int[] result = new int[513]; + private static int [] zzUnpackTrans() { + int [] result = new int[513]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; } - private static int zzUnpackTrans(String packed, int offset, int[] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ + private static int zzUnpackTrans(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ int l = packed.length(); while (i < l) { int count = packed.charAt(i++); int value = packed.charAt(i++); value--; - do - result[j++] = value; - while (--count > 0); + do result[j++] = value; while (--count > 0); } return j; } + /* error codes */ private static final int ZZ_UNKNOWN_ERROR = 0; - private static final int ZZ_NO_MATCH = 1; - private static final int ZZ_PUSHBACK_2BIG = 2; /* error messages for the codes above */ - private static final String ZZ_ERROR_MSG[] = { "Unkown internal scanner error", - "Error: could not match input", "Error: pushback value was too large" }; + private static final String ZZ_ERROR_MSG[] = { + "Unkown internal scanner error", + "Error: could not match input", + "Error: pushback value was too large" + }; /** * ZZ_ATTRIBUTE[aState] contains the attributes of state <code>aState</code> */ - private static final int[] ZZ_ATTRIBUTE = zzUnpackAttribute(); + private static final int [] ZZ_ATTRIBUTE = zzUnpackAttribute(); - private static final String ZZ_ATTRIBUTE_PACKED_0 = "\1\0\1\11\2\1\2\11\1\1\2\11\2\1\5\11" - + "\2\1\5\0\2\11\5\0"; + private static final String ZZ_ATTRIBUTE_PACKED_0 = + "\1\0\1\11\2\1\2\11\1\1\2\11\2\1\5\11"+ + "\2\1\5\0\2\11\5\0"; - private static int[] zzUnpackAttribute() { - int[] result = new int[30]; + private static int [] zzUnpackAttribute() { + int [] result = new int[30]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; } - private static int zzUnpackAttribute(String packed, int offset, int[] result) { - int i = 0; /* index in packed string */ - int j = offset; /* index in unpacked array */ + private static int zzUnpackAttribute(String packed, int offset, int [] result) { + int i = 0; /* index in packed string */ + int j = offset; /* index in unpacked array */ int l = packed.length(); while (i < l) { int count = packed.charAt(i++); int value = packed.charAt(i++); - do - result[j++] = value; - while (--count > 0); + do result[j++] = value; while (--count > 0); } return j; } @@ -306,9 +368,8 @@ class SeedLexer { /** the current lexical state */ private int zzLexicalState = YYINITIAL; - /** - * this buffer contains the current text to be matched and is the source of the yytext() string - */ + /** this buffer contains the current text to be matched and is + the source of the yytext() string */ private char zzBuffer[] = new char[ZZ_BUFFERSIZE]; /** the textposition at the last accepting state */ @@ -320,9 +381,8 @@ class SeedLexer { /** startRead marks the beginning of the yytext() string in the buffer */ private int zzStartRead; - /** - * endRead marks the last character in the buffer, that has been read from input - */ + /** endRead marks the last character in the buffer, that has been read + from input */ private int zzEndRead; /** number of newlines encountered up to the start of the matched text */ @@ -332,11 +392,12 @@ class SeedLexer { private int yychar; /** - * the number of characters from the last newline up to the start of the matched text + * the number of characters from the last newline up to the start of the + * matched text */ private int yycolumn; - /** + /** * zzAtBOL == true <=> the scanner is currently at the beginning of a line */ private boolean zzAtBOL = true; @@ -348,90 +409,91 @@ class SeedLexer { private boolean zzEOFDone; /* user code: */ - private JCas cas; + private JCas cas; + + public void setJCas(JCas cas) { + this.cas = cas; + } - public void setJCas(JCas cas) { - this.cas = cas; - } /** - * Creates a new scanner There is also a java.io.InputStream version of this constructor. - * - * @param in - * the java.io.Reader to read input from. + * Creates a new scanner + * There is also a java.io.InputStream version of this constructor. + * + * @param in the java.io.Reader to read input from. */ SeedLexer(java.io.Reader in) { this.zzReader = in; } /** - * Creates a new scanner. There is also java.io.Reader version of this constructor. - * - * @param in - * the java.io.Inputstream to read input from. + * Creates a new scanner. + * There is also java.io.Reader version of this constructor. + * + * @param in the java.io.Inputstream to read input from. */ SeedLexer(java.io.InputStream in) { this(new java.io.InputStreamReader(in)); } - /** + /** * Unpacks the compressed character translation table. - * - * @param packed - * the packed character translation table - * @return the unpacked character translation table - */ - private static char[] zzUnpackCMap(String packed) { - char[] map = new char[0x10000]; - int i = 0; /* index in packed string */ - int j = 0; /* index in unpacked array */ - while (i < 1808) { - int count = packed.charAt(i++); + * + * @param packed the packed character translation table + * @return the unpacked character translation table + */ + private static char [] zzUnpackCMap(String packed) { + char [] map = new char[0x10000]; + int i = 0; /* index in packed string */ + int j = 0; /* index in unpacked array */ + while (i < 2624) { + int count = packed.charAt(i++); char value = packed.charAt(i++); - do - map[j++] = value; - while (--count > 0); + do map[j++] = value; while (--count > 0); } return map; } + /** * Refills the input buffer. + * + * @return <code>false</code>, iff there was new input. * - * @return <code>false</code>, iff there was new input. - * - * @exception java.io.IOException - * if any I/O-Error occurs + * @exception java.io.IOException if any I/O-Error occurs */ private boolean zzRefill() throws java.io.IOException { /* first: make room (if you can) */ if (zzStartRead > 0) { - System.arraycopy(zzBuffer, zzStartRead, zzBuffer, 0, zzEndRead - zzStartRead); + System.arraycopy(zzBuffer, zzStartRead, + zzBuffer, 0, + zzEndRead-zzStartRead); /* translate stored positions */ - zzEndRead -= zzStartRead; - zzCurrentPos -= zzStartRead; - zzMarkedPos -= zzStartRead; + zzEndRead-= zzStartRead; + zzCurrentPos-= zzStartRead; + zzMarkedPos-= zzStartRead; zzStartRead = 0; } /* is the buffer big enough? */ if (zzCurrentPos >= zzBuffer.length) { /* if not: blow it up */ - char newBuffer[] = new char[zzCurrentPos * 2]; + char newBuffer[] = new char[zzCurrentPos*2]; System.arraycopy(zzBuffer, 0, newBuffer, 0, zzBuffer.length); zzBuffer = newBuffer; } /* finally: fill the buffer with new input */ - int numRead = zzReader.read(zzBuffer, zzEndRead, zzBuffer.length - zzEndRead); + int numRead = zzReader.read(zzBuffer, zzEndRead, + zzBuffer.length-zzEndRead); if (numRead > 0) { - zzEndRead += numRead; + zzEndRead+= numRead; return false; } - // unlikely but not impossible: read 0 characters, but not at end of stream + // unlikely but not impossible: read 0 characters, but not at end of stream if (numRead == 0) { int c = zzReader.read(); if (c == -1) { @@ -439,37 +501,40 @@ class SeedLexer { } else { zzBuffer[zzEndRead++] = (char) c; return false; - } + } } - // numRead < 0 + // numRead < 0 return true; } + /** * Closes the input stream. */ public final void yyclose() throws java.io.IOException { - zzAtEOF = true; /* indicate end of file */ - zzEndRead = zzStartRead; /* invalidate buffer */ + zzAtEOF = true; /* indicate end of file */ + zzEndRead = zzStartRead; /* invalidate buffer */ if (zzReader != null) zzReader.close(); } + /** - * Resets the scanner to read from a new input stream. Does not close the old reader. - * - * All internal variables are reset, the old input stream <b>cannot</b> be reused (internal buffer - * is discarded and lost). Lexical state is set to <tt>ZZ_INITIAL</tt>. - * - * @param reader - * the new input stream + * Resets the scanner to read from a new input stream. + * Does not close the old reader. + * + * All internal variables are reset, the old input stream + * <b>cannot</b> be reused (internal buffer is discarded and lost). + * Lexical state is set to <tt>ZZ_INITIAL</tt>. + * + * @param reader the new input stream */ public final void yyreset(java.io.Reader reader) { zzReader = reader; - zzAtBOL = true; - zzAtEOF = false; + zzAtBOL = true; + zzAtEOF = false; zzEOFDone = false; zzEndRead = zzStartRead = 0; zzCurrentPos = zzMarkedPos = 0; @@ -477,6 +542,7 @@ class SeedLexer { zzLexicalState = YYINITIAL; } + /** * Returns the current lexical state. */ @@ -484,90 +550,98 @@ class SeedLexer { return zzLexicalState; } + /** * Enters a new lexical state - * - * @param newState - * the new lexical state + * + * @param newState the new lexical state */ public final void yybegin(int newState) { zzLexicalState = newState; } + /** * Returns the text matched by the current regular expression. */ public final String yytext() { - return new String(zzBuffer, zzStartRead, zzMarkedPos - zzStartRead); + return new String( zzBuffer, zzStartRead, zzMarkedPos-zzStartRead ); } + /** - * Returns the character at position <tt>pos</tt> from the matched text. + * Returns the character at position <tt>pos</tt> from the + * matched text. * * It is equivalent to yytext().charAt(pos), but faster - * - * @param pos - * the position of the character to fetch. A value from 0 to yylength()-1. - * + * + * @param pos the position of the character to fetch. + * A value from 0 to yylength()-1. + * * @return the character at position pos */ public final char yycharat(int pos) { - return zzBuffer[zzStartRead + pos]; + return zzBuffer[zzStartRead+pos]; } + /** * Returns the length of the matched text region. */ public final int yylength() { - return zzMarkedPos - zzStartRead; + return zzMarkedPos-zzStartRead; } + /** * Reports an error that occured while scanning. - * - * In a wellformed scanner (no or only correct usage of yypushback(int) and a match-all fallback - * rule) this method will only be called with things that "Can't Possibly Happen". If this method - * is called, something is seriously wrong (e.g. a JFlex bug producing a faulty scanner etc.). - * - * Usual syntax/scanner level error handling should be done in error fallback rules. - * - * @param errorCode - * the code of the errormessage to display + * + * In a wellformed scanner (no or only correct usage of + * yypushback(int) and a match-all fallback rule) this method + * will only be called with things that "Can't Possibly Happen". + * If this method is called, something is seriously wrong + * (e.g. a JFlex bug producing a faulty scanner etc.). + * + * Usual syntax/scanner level error handling should be done + * in error fallback rules. + * + * @param errorCode the code of the errormessage to display */ private void zzScanError(int errorCode) { String message; try { message = ZZ_ERROR_MSG[errorCode]; - } catch (ArrayIndexOutOfBoundsException e) { + } + catch (ArrayIndexOutOfBoundsException e) { message = ZZ_ERROR_MSG[ZZ_UNKNOWN_ERROR]; } throw new Error(message); - } + } + /** * Pushes the specified amount of characters back into the input stream. - * + * * They will be read again by then next call of the scanning method - * - * @param number - * the number of characters to be read again. This number must not be greater than - * yylength()! + * + * @param number the number of characters to be read again. + * This number must not be greater than yylength()! */ - public void yypushback(int number) { - if (number > yylength()) + public void yypushback(int number) { + if ( number > yylength() ) zzScanError(ZZ_PUSHBACK_2BIG); zzMarkedPos -= number; } + /** - * Resumes scanning until the next regular expression is matched, the end of input is encountered - * or an I/O-Error occurs. - * - * @return the next token - * @exception java.io.IOException - * if any I/O-Error occurs + * Resumes scanning until the next regular expression is matched, + * the end of input is encountered or an I/O-Error occurs. + * + * @return the next token + * @exception java.io.IOException if any I/O-Error occurs */ public AnnotationFS yylex() throws java.io.IOException { int zzInput; @@ -577,42 +651,43 @@ class SeedLexer { int zzCurrentPosL; int zzMarkedPosL; int zzEndReadL = zzEndRead; - char[] zzBufferL = zzBuffer; - char[] zzCMapL = ZZ_CMAP; + char [] zzBufferL = zzBuffer; + char [] zzCMapL = ZZ_CMAP; - int[] zzTransL = ZZ_TRANS; - int[] zzRowMapL = ZZ_ROWMAP; - int[] zzAttrL = ZZ_ATTRIBUTE; + int [] zzTransL = ZZ_TRANS; + int [] zzRowMapL = ZZ_ROWMAP; + int [] zzAttrL = ZZ_ATTRIBUTE; while (true) { zzMarkedPosL = zzMarkedPos; - yychar += zzMarkedPosL - zzStartRead; + yychar+= zzMarkedPosL-zzStartRead; boolean zzR = false; - for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; zzCurrentPosL++) { + for (zzCurrentPosL = zzStartRead; zzCurrentPosL < zzMarkedPosL; + zzCurrentPosL++) { switch (zzBufferL[zzCurrentPosL]) { - case '\u000B': - case '\u000C': - case '\u0085': - case '\u2028': - case '\u2029': - yyline++; + case '\u000B': + case '\u000C': + case '\u0085': + case '\u2028': + case '\u2029': + yyline++; + zzR = false; + break; + case '\r': + yyline++; + zzR = true; + break; + case '\n': + if (zzR) zzR = false; - break; - case '\r': + else { yyline++; - zzR = true; - break; - case '\n': - if (zzR) - zzR = false; - else { - yyline++; - } - break; - default: - zzR = false; + } + break; + default: + zzR = false; } } @@ -628,56 +703,56 @@ class SeedLexer { zzEndReadL = zzEndRead; zzMarkedPosL = zzMarkedPos; zzBufferL = zzBuffer; - if (eof) + if (eof) zzPeek = false; - else + else zzPeek = zzBufferL[zzMarkedPosL] == '\n'; } - if (zzPeek) - yyline--; + if (zzPeek) yyline--; } zzAction = -1; zzCurrentPosL = zzCurrentPos = zzStartRead = zzMarkedPosL; - + zzState = ZZ_LEXSTATE[zzLexicalState]; + zzForAction: { while (true) { - + if (zzCurrentPosL < zzEndReadL) zzInput = zzBufferL[zzCurrentPosL++]; else if (zzAtEOF) { zzInput = YYEOF; break zzForAction; - } else { + } + else { // store back cached positions - zzCurrentPos = zzCurrentPosL; - zzMarkedPos = zzMarkedPosL; + zzCurrentPos = zzCurrentPosL; + zzMarkedPos = zzMarkedPosL; boolean eof = zzRefill(); // get translated positions and possibly new buffer - zzCurrentPosL = zzCurrentPos; - zzMarkedPosL = zzMarkedPos; - zzBufferL = zzBuffer; - zzEndReadL = zzEndRead; + zzCurrentPosL = zzCurrentPos; + zzMarkedPosL = zzMarkedPos; + zzBufferL = zzBuffer; + zzEndReadL = zzEndRead; if (eof) { zzInput = YYEOF; break zzForAction; - } else { + } + else { zzInput = zzBufferL[zzCurrentPosL++]; } } - int zzNext = zzTransL[zzRowMapL[zzState] + zzCMapL[zzInput]]; - if (zzNext == -1) - break zzForAction; + int zzNext = zzTransL[ zzRowMapL[zzState] + zzCMapL[zzInput] ]; + if (zzNext == -1) break zzForAction; zzState = zzNext; int zzAttributes = zzAttrL[zzState]; - if ((zzAttributes & 1) == 1) { + if ( (zzAttributes & 1) == 1 ) { zzAction = zzState; zzMarkedPosL = zzCurrentPosL; - if ((zzAttributes & 8) == 8) - break zzForAction; + if ( (zzAttributes & 8) == 8 ) break zzForAction; } } @@ -687,167 +762,152 @@ class SeedLexer { zzMarkedPos = zzMarkedPosL; switch (zzAction < 0 ? zzAction : ZZ_ACTION[zzAction]) { - case 3: { - NUM t = new NUM(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 17: - break; - case 13: { - QUESTION t = new QUESTION(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 18: - break; - case 8: { - SW t = new SW(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 19: - break; - case 4: { - SPACE t = new SPACE(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 20: - break; - case 11: { - COMMA t = new COMMA(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 21: - break; - case 1: { - SPECIAL t = new SPECIAL(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 22: - break; - case 15: { - MARKUP t = new MARKUP(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 23: - break; - case 6: { - EXCLAMATION t = new EXCLAMATION(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 24: - break; - case 7: { - NBSP t = new NBSP(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 25: - break; - case 14: { - CAP t = new CAP(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 26: - break; - case 12: { - PERIOD t = new PERIOD(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 27: - break; - case 5: { - BREAK t = new BREAK(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 28: - break; - case 2: { - CW t = new CW(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 29: - break; - case 10: { - COLON t = new COLON(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 30: - break; - case 9: { - SEMICOLON t = new SEMICOLON(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 31: - break; - case 16: { - AMP t = new AMP(cas); - t.setBegin(yychar); - t.setEnd(yychar + yytext().length()); - - return t; - } - case 32: - break; - default: + case 3: + { NUM t = new NUM(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 17: break; + case 13: + { QUESTION t = new QUESTION(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 18: break; + case 8: + { SW t = new SW(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 19: break; + case 4: + { SPACE t = new SPACE(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 20: break; + case 11: + { COMMA t = new COMMA(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 21: break; + case 1: + { SPECIAL t = new SPECIAL(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 22: break; + case 15: + { MARKUP t = new MARKUP(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 23: break; + case 6: + { EXCLAMATION t = new EXCLAMATION(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 24: break; + case 7: + { NBSP t = new NBSP(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 25: break; + case 14: + { CAP t = new CAP(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 26: break; + case 12: + { PERIOD t = new PERIOD(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 27: break; + case 5: + { BREAK t = new BREAK(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 28: break; + case 2: + { CW t = new CW(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 29: break; + case 10: + { COLON t = new COLON(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 30: break; + case 9: + { SEMICOLON t = new SEMICOLON(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 31: break; + case 16: + { AMP t = new AMP(cas); + t.setBegin(yychar); + t.setEnd(yychar + yytext().length()); + + return t; + } + case 32: break; + default: if (zzInput == YYEOF && zzStartRead == zzCurrentPos) { zzAtEOF = true; switch (zzLexicalState) { - case YYINITIAL: { - return null; - } - case 31: - break; - default: - return null; + case YYINITIAL: { + return null; } - } else { + case 31: break; + default: + return null; + } + } + else { zzScanError(ZZ_NO_MATCH); } } } } + } Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicEngine.xml Tue Nov 4 13:26:36 2014 @@ -206,7 +206,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array> Modified: uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicTypeSystem.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicTypeSystem.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicTypeSystem.xml (original) +++ uima/ruta/trunk/ruta-core/src/main/resources/org/apache/uima/ruta/engine/BasicTypeSystem.xml Tue Nov 4 13:26:36 2014 @@ -120,7 +120,7 @@ <typeDescription> <name>org.apache.uima.ruta.type.NBSP</name> <description/> - <supertypeName>org.apache.uima.ruta.type.ANY</supertypeName> + <supertypeName>org.apache.uima.ruta.type.SPACE</supertypeName> </typeDescription> <typeDescription> <name>org.apache.uima.ruta.type.AMP</name> Added: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java?rev=1636576&view=auto ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java (added) +++ uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java Tue Nov 4 13:26:36 2014 @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.uima.ruta; + +import static org.junit.Assert.assertEquals; + +import org.apache.uima.cas.CAS; +import org.apache.uima.cas.Type; +import org.apache.uima.cas.text.AnnotationFS; +import org.apache.uima.cas.text.AnnotationIndex; +import org.apache.uima.ruta.engine.Ruta; +import org.apache.uima.ruta.engine.RutaTestUtils; +import org.junit.Test; + +public class NBSPWSTest { + + @Test + public void test() { + String document = ((char) 160) + "-" + ((char) 8239) + "-" + ((char) 65279) + "-" + ((char) 8199) + + "-" + ((char) 6158); + String script = "Document{-> RETAINTYPE(SPACE)};"; + script += "WS{-> T1};"; + CAS cas = null; + try { + cas = RutaTestUtils.getCAS(document); + Ruta.apply(cas, script); + } catch (Exception e) { + e.printStackTrace(); + } + + Type t = null; + AnnotationIndex<AnnotationFS> ai = null; + + t = RutaTestUtils.getTestType(cas, 1); + ai = cas.getAnnotationIndex(t); + assertEquals(5, ai.size()); + + if (cas != null) { + cas.release(); + } + + } +} Propchange: uima/ruta/trunk/ruta-core/src/test/java/org/apache/uima/ruta/NBSPWSTest.java ------------------------------------------------------------------------------ svn:eol-style = native Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/BasicTypeSystem.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/BasicTypeSystem.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/BasicTypeSystem.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/BasicTypeSystem.xml Tue Nov 4 13:26:36 2014 @@ -120,7 +120,7 @@ <typeDescription> <name>org.apache.uima.ruta.type.NBSP</name> <description/> - <supertypeName>org.apache.uima.ruta.type.ANY</supertypeName> + <supertypeName>org.apache.uima.ruta.type.SPACE</supertypeName> </typeDescription> <typeDescription> <name>org.apache.uima.ruta.type.AMP</name> Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TestEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TestEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TestEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TestEngine.xml Tue Nov 4 13:26:36 2014 @@ -181,7 +181,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array> Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TypeAliasTestEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TypeAliasTestEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TypeAliasTestEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/TypeAliasTestEngine.xml Tue Nov 4 13:26:36 2014 @@ -188,7 +188,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array> @@ -223,22 +222,19 @@ <nameValuePair> <name>scriptPaths</name> <value> - <array> - </array> + <array/> </value> </nameValuePair> <nameValuePair> <name>descriptorPaths</name> <value> - <array> - </array> + <array/> </value> </nameValuePair> <nameValuePair> <name>resourcePaths</name> <value> - <array> - </array> + <array/> </value> </nameValuePair> <nameValuePair> Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/CWEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/CWEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/CWEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/CWEngine.xml Tue Nov 4 13:26:36 2014 @@ -207,7 +207,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array> Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SWEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SWEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SWEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SWEngine.xml Tue Nov 4 13:26:36 2014 @@ -207,7 +207,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array> Modified: uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SimpleEngine.xml URL: http://svn.apache.org/viewvc/uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SimpleEngine.xml?rev=1636576&r1=1636575&r2=1636576&view=diff ============================================================================== --- uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SimpleEngine.xml (original) +++ uima/ruta/trunk/ruta-core/src/test/resources/org/apache/uima/ruta/engine/SimpleEngine.xml Tue Nov 4 13:26:36 2014 @@ -207,7 +207,6 @@ <value> <array> <string>org.apache.uima.ruta.type.SPACE</string> - <string>org.apache.uima.ruta.type.NBSP</string> <string>org.apache.uima.ruta.type.BREAK</string> <string>org.apache.uima.ruta.type.MARKUP</string> </array>