action...

pkluegl Thu, 21 Mar 2013 06:44:36 -0700

Modified: 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.syntax.xml
 Thu Mar 21 13:44:04 2013
@@ -83,7 +83,11 @@ BlockDeclaration    -> "BLOCK" "(" Ident
                                                        "{" Statements 
"}"]]></programlisting>
 
     Syntax of statements and rule elements:
-    <programlisting><![CDATA[SimpleStatement        -> RuleElements ";"
+    <programlisting><![CDATA[SimpleStatement        -> RuleElements ";" | 
RegExpRule ";"
+RegExpRule             -> StringExpression "->" GroupAssignment 
+                          ("," GroupAssignment)*
+GroupAssignment        -> TypeExpression 
+                        | NumberEpxression "=" TypeExpression
 RuleElements           -> RuleElement+
 RuleElement            -> RuleElementType | RuleElementLiteral
                         | RuleElementComposed | RuleElementDisjunctive


Modified: 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.xml
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.xml?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.xml
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.language.xml
 Thu Mar 21 13:44:04 2013
@@ -561,4 +561,31 @@ Document{->MARKTABLE(PresidentOfUSA, 1, 
       </para>
     </section>
   </section>
+  <section id="ugr.tools.tm.language.regexprule">
+    <title>Simple Rules based on Regular Expressions</title>
+    <para>
+      The TextMarker langugae includes, additionally to the normal rules, a 
simplified rule syntax for processing regular expressions.
+      These simple rules consist of two parts separated by <quote>-></quote>: 
The left part is the regular expression 
+      (flags: DOTALL and MULTILINE), which may contain capturing groups. The 
right part defines, which kind of annotations 
+      should be created for each match of the regular expression. If a type is 
given without a group index, then an annotation of that type is
+      created for the complete regular expression match, which corresponds to 
group 0. These simple rules can be restricted to match only within
+      certain annotations using the BLOCK construct, and ignore all filtering 
settings.
+    </para>
+    
+    <programlisting><![CDATA[
+RegExpRule      -> StringExpression "->" GroupAssignment 
+                  ("," GroupAssignment)* ";"
+GroupAssignment -> TypeExpression | NumberEpxression "=" TypeExpression
+]]></programlisting>
+    
+    <para>
+      The following example contains a simple rule, which is able to create 
annotations of two different types. It creates an annotation 
+      of the type <quote>T1</quote> for each match of the complete regular 
expression and an annotation 
+      of the type <quote>T2</quote> for each match of the first capturing 
group.
+    </para>
+    
+    <programlisting><![CDATA["A(.*?)C" -> T1, 1 = T2;]]></programlisting>
+    
+    
+  </section>
 </chapter>
\ No newline at end of file

Modified: 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-docbook/src/docbook/tools.textmarker.overview.xml
 Thu Mar 21 13:44:04 2013
@@ -179,7 +179,15 @@ W{REGEXP("dog") -> MARK(Animal)};]]></pr
       The default seeder does actually not add annotations of the type 
<quote>W</quote>, but annotations of the types <quote>SW</quote> and 
       <quote>CW</quote> for small written words and capitalized words, which 
both have the parent type <quote>W</quote>.
     </para>
-
+    
+    <para>
+      There is also a special kind of rules, which follow a different syntax 
and semantic, and enables a simplified creation of annotations based on regular 
expression.
+      The following rule, for example, creates an <quote>Animal</quote> 
annotation for each occurrence of <quote>dog</quote> or <quote>cat</quote>.
+    </para>
+    
+    <programlisting><![CDATA[DECLARE Animal;
+"dog|cat" -> Animal;]]></programlisting>
+    
     <para>
       Since it is tedious to create Animal annotations by matching on 
different regular expression, we apply an external dictionary in the next 
example.
       The first line defines a word list named <quote>AnimalsList</quote>, 
which is located in the resource folder (the file <quote>Animals.txt</quote> 

Modified: 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/ExplainTree.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/ExplainTree.java?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/ExplainTree.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/ExplainTree.java
 Thu Mar 21 13:44:04 2013
@@ -238,9 +238,11 @@ public class ExplainTree {
 
     Feature feature = 
ruleMatchType.getFeatureByBaseName(ExplainConstants.ELEMENTS);
     ArrayFS value = (ArrayFS) fs.getFeatureValue(feature);
-    FeatureStructure[] fsarray = value.toArray();
-    for (FeatureStructure each : fsarray) {
-      buildTree(each, remRoot, ts, offset, onlyRules);
+    if (value != null) {
+      FeatureStructure[] fsarray = value.toArray();
+      for (FeatureStructure each : fsarray) {
+        buildTree(each, remRoot, ts, offset, onlyRules);
+      }
     }
   }
 
@@ -271,15 +273,16 @@ public class ExplainTree {
 
     feature = 
ruleElementMatchType.getFeatureByBaseName(ExplainConstants.CONDITIONS);
     ArrayFS value = (ArrayFS) fs.getFeatureValue(feature);
-    FeatureStructure[] fsarray = value.toArray();
-    for (FeatureStructure each : fsarray) {
-      buildTree(each, remNode, ts, offset, onlyRules);
+    if (value != null) {
+      FeatureStructure[] fsarray = value.toArray();
+      for (FeatureStructure each : fsarray) {
+        buildTree(each, remNode, ts, offset, onlyRules);
+      }
     }
-
     feature = fs.getType().getFeatureByBaseName(ExplainConstants.ELEMENTS);
     value = (ArrayFS) fs.getFeatureValue(feature);
     if (value != null) {
-      fsarray = value.toArray();
+      FeatureStructure[] fsarray = value.toArray();
       for (FeatureStructure each : fsarray) {
         buildTree(each, remNode, ts, offset, onlyRules);
       }

Modified: 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/RuleElementMatchNode.java
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/RuleElementMatchNode.java?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/RuleElementMatchNode.java
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-ep-addons/src/main/java/org/apache/uima/textmarker/explain/tree/RuleElementMatchNode.java
 Thu Mar 21 13:44:04 2013
@@ -41,11 +41,13 @@ public class RuleElementMatchNode extend
 
     f = fs.getType().getFeatureByBaseName(ExplainConstants.CONDITIONS);
     ArrayFS value = (ArrayFS) fs.getFeatureValue(f);
-    FeatureStructure[] fsarray = value.toArray();
-    for (FeatureStructure each : fsarray) {
-      Feature eachFeat = 
each.getType().getFeatureByBaseName(ExplainConstants.VALUE);
-      boolean eachValue = each.getBooleanValue(eachFeat);
-      matched &= eachValue;
+    if (value != null) {
+      FeatureStructure[] fsarray = value.toArray();
+      for (FeatureStructure each : fsarray) {
+        Feature eachFeat = 
each.getType().getFeatureByBaseName(ExplainConstants.VALUE);
+        boolean eachValue = each.getBooleanValue(eachFeat);
+        matched &= eachValue;
+      }
     }
   }
 

Modified: 
uima/sandbox/textmarker/trunk/textmarker-ep-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g
URL: 
http://svn.apache.org/viewvc/uima/sandbox/textmarker/trunk/textmarker-ep-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g?rev=1459309&r1=1459308&r2=1459309&view=diff
==============================================================================
--- 
uima/sandbox/textmarker/trunk/textmarker-ep-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g
 (original)
+++ 
uima/sandbox/textmarker/trunk/textmarker-ep-ide/src/main/antlr3/org/apache/uima/textmarker/ide/core/parser/TextMarkerParser.g
 Thu Mar 21 13:44:04 2013
@@ -566,12 +566,42 @@ ruleElementWithoutCA returns [TextMarker
                
 simpleStatement returns [TextMarkerRule stmt = null]
        : 
+       (regexpRule)=> rer = regexpRule {stmt = rer;}
+       |
        elements=ruleElements 
                s = SEMI 
                {stmt = scriptFactory.createRule(elements, s);}
                
        ;
 
+regexpRule returns [TextMarkerRule stmt = null]
+@init{
+       List<Expression> exprs = new ArrayList<Expression>();
+}
+       :
+       regexp = stringExpression {exprs.add(regexp);} {stmt = 
scriptFactory.createRule(exprs, s);} THEN
+       (
+       te = typeExpression {exprs.add(te);} {stmt = 
scriptFactory.createRule(exprs, s);}
+       |
+       indexCG = numberExpression {exprs.add(indexCG);}{stmt = 
scriptFactory.createRule(exprs, s);} ASSIGN_EQUAL indexTE = typeExpression 
{exprs.add(indexTE);}
+       )
+       (
+       COMMA
+       (
+       te = typeExpression {exprs.add(te);}{stmt = 
scriptFactory.createRule(exprs, s);}
+       |
+       indexCG = numberExpression {exprs.add(indexCG);}{stmt = 
scriptFactory.createRule(exprs, s);} ASSIGN_EQUAL indexTE = typeExpression 
{exprs.add(indexTE);}
+       )
+       
+       )*
+
+       s = SEMI
+       {stmt = scriptFactory.createRule(exprs, s);}
+       
+       ;
+
+
+
 ruleElements returns [List<Expression> elements = new ArrayList<Expression>()]
        :
        re = ruleElement {if(re!=null) elements.add(re);} (re = ruleElement 
{if(re!=null) elements.add(re);})*

svn commit: r1459309 [2/2] - in /uima/sandbox/textmarker/trunk: textmarker-core/src/main/antlr3/org/apache/uima/textmarker/parser/ textmarker-core/src/main/java/org/apache/uima/textmarker/ textmarker-core/src/main/java/org/apache/uima/textmarker/action...

Reply via email to