Revision: 7200
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7200&view=rev
Author:   milek_pl
Date:     2012-06-03 09:15:48 +0000 (Sun, 03 Jun 2012)
Log Message:
-----------
[en] further fixes to CD_NN

Modified Paths:
--------------
    trunk/JLanguageTool/src/resource/en/disambiguation.xml
    trunk/JLanguageTool/src/rules/en/grammar.xml

Modified: trunk/JLanguageTool/src/resource/en/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/en/disambiguation.xml      2012-06-03 
05:57:21 UTC (rev 7199)
+++ trunk/JLanguageTool/src/resource/en/disambiguation.xml      2012-06-03 
09:15:48 UTC (rev 7200)
@@ -111,7 +111,7 @@
             <marker>
                 <token>are</token>
             </marker>
-            <token postag="JJ.*|VBG|IN|DT|RB" postag_regexp="yes"></token>
+            <token postag="JJ.*|VB[GN]|IN|DT|RB" postag_regexp="yes"></token>
         </pattern>
         <disambig><match no="1" postag="VBP" 
postag_regexp="yes"></match></disambig>
         <example type="ambiguous" inputform="are[are/NN,be/VBP]" 
outputform="are[be/VBP]">What follows <marker>are</marker> the terms used by 
the Aikikai Foundation</example>
@@ -1489,6 +1489,39 @@
                <disambig postag="VBD"/>
                <example type="ambiguous" 
inputform="saw[saw/NN,saw/VB,saw/VBP,see/VBD]" outputform="saw[see/VBD]">The 
years 1812 and 1813 <marker>saw</marker> him in Germany and France 
again</example>
        </rule>
+
+       <!--  poor man's entity recognizer to recognize numbers contained in 
proper names -->
        
-
+       <rule id="NNP_CD" name="Apollo 11">
+               <pattern>
+                       <token postag="NNP"/>
+                       <marker><token postag="CD"/></marker>
+               </pattern>
+               <disambig action="add"><wd pos="NNP"></wd></disambig>
+               <example type="untouched">Apollo is beautiful.</example>
+               <example type="ambiguous" inputform="12[12/CD]" 
outputform="12[12/CD,12/NNP]">Apollo <marker>12</marker> was beatiful.</example>
+       </rule>
+       
+       <rulegroup id="CAN_MD" name="can as a modal verb">
+       <rule>
+               <pattern>
+                       <marker><token>can</token></marker>
+                       <token postag="VB"/>                    
+               </pattern>
+               <disambig postag="MD"/>
+               <example type="untouched">It is a can of soup.</example>
+               <example type="ambiguous" inputform="can[can/MD,can/NN]" 
outputform="can[can/MD]">This <marker>can</marker> be eliminated.</example>
+       </rule>
+       <rule>
+               <pattern>
+                       <marker><token>can</token></marker>
+                       <token postag="RB"/>
+                       <token postag="VB"/>                    
+               </pattern>
+               <disambig postag="MD"/>
+               <example type="untouched">It is a can of soup.</example>
+               <example type="ambiguous" inputform="can[can/MD,can/NN]" 
outputform="can[can/MD]">This <marker>can</marker> only be eliminated.</example>
+       </rule>
+               
+       </rulegroup>
 </rules>

Modified: trunk/JLanguageTool/src/rules/en/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/en/grammar.xml        2012-06-03 05:57:21 UTC 
(rev 7199)
+++ trunk/JLanguageTool/src/rules/en/grammar.xml        2012-06-03 09:15:48 UTC 
(rev 7200)
@@ -7792,9 +7792,9 @@
         <rule id="CD_NN" name="Possible agreement error: numeral + singular 
countable noun">
             <pattern>
                 <marker>
-                    <token spacebefore="yes" postag="CD"><exception 
regexp="yes">one|thou|zero|1</exception><exception scope="previous" 
postag="DT|PRP\$|POS" postag_regexp="yes"/><exception regexp="yes" 
scope="previous">January|February|March|April|May|June|July|August|September|October|November|December|size|\$\d+</exception><exception
 spacebefore="no" scope="previous">.</exception></token>
+                    <token spacebefore="yes" postag="CD"><exception 
regexp="yes">one|thou|zero|1</exception><exception postag="NNP"/><exception 
scope="previous" postag="DT|PRP\$|POS" postag_regexp="yes"/><exception 
regexp="yes" 
scope="previous">January|February|March|April|May|June|July|August|September|October|November|December|size|\$\d+</exception><exception
 spacebefore="no" scope="previous">.</exception></token>
                     <token postag="NN:UN|NN" postag_regexp="yes"><exception 
postag="NN:U|NNS|NNPS|JJ.*|IN" 
postag_regexp="yes"></exception><exception>will</exception>
-                    <exception 
regexp="yes">mm|lb|yd|ft|Msec|km|cm|mg|kg|gm|cc|stat|kc|hr</exception></token>
+                    <exception 
regexp="yes">mm|lb|yd|ft|Msec|km|cm|mg|kg|gm|cc|stat|kc|hr|min|m3|mcg|cpu|KB|MB</exception></token>
                 </marker>                
                 <token><exception postag="NN.*" 
postag_regexp="yes"></exception><exception>'</exception></token>                
             </pattern>
@@ -7812,6 +7812,7 @@
             <example type="correct">It takes about 16 Msec. to traverse 25 mm. 
of tissue.</example>
             <example type="correct">$800 billion economy was envisioned for 
the 1970s</example>
             <example type="correct">Middle East peace after Israel's 1956 
invasion of Egypt</example>
+            <example type="correct">The Apollo 8 mission was well covered in 
the British documentary.</example>
         </rule>
         <rule id="MANY_NN" name="Possible agreement error: 'many/several/few' 
+ singular countable noun">
             <pattern>

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to