Revision: 7200
http://languagetool.svn.sourceforge.net/languagetool/?rev=7200&view=rev
Author: milek_pl
Date: 2012-06-03 09:15:48 +0000 (Sun, 03 Jun 2012)
Log Message:
-----------
[en] further fixes to CD_NN
Modified Paths:
--------------
trunk/JLanguageTool/src/resource/en/disambiguation.xml
trunk/JLanguageTool/src/rules/en/grammar.xml
Modified: trunk/JLanguageTool/src/resource/en/disambiguation.xml
===================================================================
--- trunk/JLanguageTool/src/resource/en/disambiguation.xml 2012-06-03
05:57:21 UTC (rev 7199)
+++ trunk/JLanguageTool/src/resource/en/disambiguation.xml 2012-06-03
09:15:48 UTC (rev 7200)
@@ -111,7 +111,7 @@
<marker>
<token>are</token>
</marker>
- <token postag="JJ.*|VBG|IN|DT|RB" postag_regexp="yes"></token>
+ <token postag="JJ.*|VB[GN]|IN|DT|RB" postag_regexp="yes"></token>
</pattern>
<disambig><match no="1" postag="VBP"
postag_regexp="yes"></match></disambig>
<example type="ambiguous" inputform="are[are/NN,be/VBP]"
outputform="are[be/VBP]">What follows <marker>are</marker> the terms used by
the Aikikai Foundation</example>
@@ -1489,6 +1489,39 @@
<disambig postag="VBD"/>
<example type="ambiguous"
inputform="saw[saw/NN,saw/VB,saw/VBP,see/VBD]" outputform="saw[see/VBD]">The
years 1812 and 1813 <marker>saw</marker> him in Germany and France
again</example>
</rule>
+
+ <!-- poor man's entity recognizer to recognize numbers contained in
proper names -->
-
+ <rule id="NNP_CD" name="Apollo 11">
+ <pattern>
+ <token postag="NNP"/>
+ <marker><token postag="CD"/></marker>
+ </pattern>
+ <disambig action="add"><wd pos="NNP"></wd></disambig>
+ <example type="untouched">Apollo is beautiful.</example>
+ <example type="ambiguous" inputform="12[12/CD]"
outputform="12[12/CD,12/NNP]">Apollo <marker>12</marker> was beatiful.</example>
+ </rule>
+
+ <rulegroup id="CAN_MD" name="can as a modal verb">
+ <rule>
+ <pattern>
+ <marker><token>can</token></marker>
+ <token postag="VB"/>
+ </pattern>
+ <disambig postag="MD"/>
+ <example type="untouched">It is a can of soup.</example>
+ <example type="ambiguous" inputform="can[can/MD,can/NN]"
outputform="can[can/MD]">This <marker>can</marker> be eliminated.</example>
+ </rule>
+ <rule>
+ <pattern>
+ <marker><token>can</token></marker>
+ <token postag="RB"/>
+ <token postag="VB"/>
+ </pattern>
+ <disambig postag="MD"/>
+ <example type="untouched">It is a can of soup.</example>
+ <example type="ambiguous" inputform="can[can/MD,can/NN]"
outputform="can[can/MD]">This <marker>can</marker> only be eliminated.</example>
+ </rule>
+
+ </rulegroup>
</rules>
Modified: trunk/JLanguageTool/src/rules/en/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 05:57:21 UTC
(rev 7199)
+++ trunk/JLanguageTool/src/rules/en/grammar.xml 2012-06-03 09:15:48 UTC
(rev 7200)
@@ -7792,9 +7792,9 @@
<rule id="CD_NN" name="Possible agreement error: numeral + singular
countable noun">
<pattern>
<marker>
- <token spacebefore="yes" postag="CD"><exception
regexp="yes">one|thou|zero|1</exception><exception scope="previous"
postag="DT|PRP\$|POS" postag_regexp="yes"/><exception regexp="yes"
scope="previous">January|February|March|April|May|June|July|August|September|October|November|December|size|\$\d+</exception><exception
spacebefore="no" scope="previous">.</exception></token>
+ <token spacebefore="yes" postag="CD"><exception
regexp="yes">one|thou|zero|1</exception><exception postag="NNP"/><exception
scope="previous" postag="DT|PRP\$|POS" postag_regexp="yes"/><exception
regexp="yes"
scope="previous">January|February|March|April|May|June|July|August|September|October|November|December|size|\$\d+</exception><exception
spacebefore="no" scope="previous">.</exception></token>
<token postag="NN:UN|NN" postag_regexp="yes"><exception
postag="NN:U|NNS|NNPS|JJ.*|IN"
postag_regexp="yes"></exception><exception>will</exception>
- <exception
regexp="yes">mm|lb|yd|ft|Msec|km|cm|mg|kg|gm|cc|stat|kc|hr</exception></token>
+ <exception
regexp="yes">mm|lb|yd|ft|Msec|km|cm|mg|kg|gm|cc|stat|kc|hr|min|m3|mcg|cpu|KB|MB</exception></token>
</marker>
<token><exception postag="NN.*"
postag_regexp="yes"></exception><exception>'</exception></token>
</pattern>
@@ -7812,6 +7812,7 @@
<example type="correct">It takes about 16 Msec. to traverse 25 mm.
of tissue.</example>
<example type="correct">$800 billion economy was envisioned for
the 1970s</example>
<example type="correct">Middle East peace after Israel's 1956
invasion of Egypt</example>
+ <example type="correct">The Apollo 8 mission was well covered in
the British documentary.</example>
</rule>
<rule id="MANY_NN" name="Possible agreement error: 'many/several/few'
+ singular countable noun">
<pattern>
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs