Revision: 6629
http://languagetool.svn.sourceforge.net/languagetool/?rev=6629&view=rev
Author: dominikoeo
Date: 2012-03-22 18:59:40 +0000 (Thu, 22 Mar 2012)
Log Message:
-----------
[br] - updated POS tags for plural nouns of persons to
avoid false positive in things like "ar bugaligo?\195?\185"
- detection of hard mutation after "ho" was missing one
case for detecting *ho gwaz* -> "ho kwaz"
Modified Paths:
--------------
trunk/JLanguageTool/src/resource/br/breton.dict
trunk/JLanguageTool/src/resource/br/create-lexicon.pl
trunk/JLanguageTool/src/rules/br/grammar.xml
Modified: trunk/JLanguageTool/src/resource/br/breton.dict
===================================================================
(Binary files differ)
Modified: trunk/JLanguageTool/src/resource/br/create-lexicon.pl
===================================================================
--- trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-03-22
16:07:55 UTC (rev 6628)
+++ trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-03-22
18:59:40 UTC (rev 6629)
@@ -102,6 +102,7 @@
"barverien", "varverien", "parverien",
"barzhed", "varzhed", "parzhed",
"barzhonegourien", "varzhonegourien", "parzhonegourien",
+ "barzhoniezhourien", "varzhoniezhourien", "parzhoniezhourien",
"beajourien", "veajourien", "peajourien",
"bedoniourien", "vedoniourien", "pedoniourien",
"begennelourien", "vegennelourien", "pegennelourien",
@@ -116,10 +117,12 @@
"besrenerien", "vesrenerien", "pesrenerien",
"bessekretourien", "vessekretourien", "pessekretourien",
"bes-sekretourien", "ves-sekretourien", "pes-sekretourien",
+ "besteñzorerien", "vesteñzorerien", "pesteñzorerien",
"bes-teñzorerien", "ves-teñzorerien", "pes-teñzorerien",
"bevezerien", "vevezerien", "pevezerien",
- "bevoniourien", "vevonourien", "pevonourien",
+ "bevoniourien", "vevoniourien", "pevoniourien",
"bevoniourion", "vevonourion", "pevonourion",
+ "bezierien", "vezierien", "pezierien",
"bidanellerien", "vidanellerien", "pidanellerien",
"bigrierien", "vigrierien", "pigrierien",
"biniaouerien", "viniaouerien", "piniaouerien",
@@ -138,6 +141,9 @@
"botaouerien", "votaouerien", "potaouerien",
"bouloñjerien", "vouloñjerien", "pouloñjerien",
"bouloñjerion", "vouloñjerion", "pouloñjerion",
+ "bourc’hizien", "vourc’hizien", "pourc’hizien",
+ "bourevien", "vourevien", "pourevien",
+ "boutellerien", "voutellerien", "poutellerien",
"boutinelerien", "voutinelerien", "poutinelerien",
"brabañserien", "vrabañserien", "prabañserien",
"braventiourien", "vraventiourien", "praventiourien",
@@ -146,17 +152,20 @@
"bredelfennerion", "vredelfennerion", "predelfennerion",
"bredklañvourien", "vredklañvourien", "predklañvourien",
"bredoniourien", "vredoniourien", "predoniourien",
+ "bredourien", "vredourien", "predourien",
"bredvezeien", "vredvezeien", "predvezeien",
"bredvezeion", "vredvezeion", "predvezeion",
"breolimerien", "vreolimerien", "preolimerien",
"breserien", "vreserien", "preserien",
"bresourien", "vresourien", "presourien",
+ "bretorien", "vretorien", "pretorien",
"breudeur", "vreudeur", "preudeur",
"breutaerien", "vreutaerien", "preutaerien",
"brezelourien", "vrezelourien", "prezelourien",
"brezhonegerien", "vrezhonegerien", "prezhonegerien",
"brientinien", "vrientinien", "prientinien",
"brigadennourien", "vrigadennourien", "prigadennourien",
+ "brigadierien", "vrigadierien", "prigadierien",
"brikerien", "vrikerien", "prikerien",
"brizhkeltiegourien", "vrizhkeltiegourien", "prizhkeltiegourien",
"brizhkredennourien", "vrizhkredennourien", "prizhkredennourien",
@@ -167,15 +176,21 @@
"brozennourien", "vrozennourien", "prozennourien",
"brudourien", "vrudourien", "prudourien",
"bugale", "vugale", "pugale",
+ "bugaleigoù", "vugaleigoù", "pugaleigoù",
+ "bugaligoù", "vugaligoù", "pugaligoù",
"bugulien", "vugulien", "pugulien",
"buhezegezhourien", "vuhezegezhourien", "puhezegezhourien",
"buhezoniourien", "vuhezoniourien", "puhezoniourien",
+ "buhezourien", "vuhezourien", "puhezourien",
"buhezskridourien", "vuhezskridourien", "puhezskridourien",
+ "buhezskriverien", "vuhezskriverien", "puhezskriverien",
"burutellerien", "vurutellerien", "purutellerien",
"butunerien", "vutunerien", "putunerien",
"butunerion", "vutunerion", "putunerion",
+ "chakerien",
"chalboterien",
"chaokerien",
+ "charreerien",
"charretourien",
"chaseourien",
"cherifed",
@@ -183,7 +198,9 @@
"cow-boyed",
"c’hoarierien",
"c’hoarzherien",
+ "c’hwennerien",
"c’hwiblaerien",
+ "c’hwilierien",
"c’hwiletaerien",
"dalc’hourien", "zalc’hourien", "talc’hourien",
"damesaerien", "zamesaerien", "tamesaerien",
@@ -236,6 +253,8 @@
"dreistwelourien", "zreistwelourien", "treistwelourien",
"drouklazherien", "zrouklazherien", "trouklazherien",
"eilrenerien",
+ "eilsekretourien",
+ "eil-sekretourien",
"eksibien",
"embannerien",
"emgannerien",
@@ -357,6 +376,7 @@
"klañvourien", "glañvourien", "c’hlañvourien",
"klerinellourien", "glerinellourien", "c’hlerinellourien",
"klezeourien", "glezeourien", "c’hlezeourien",
+ "komisien", "gomisien", "c’homisien",
"kompezourien", "gompezourien", "c’hompezourien",
"komunourien", "gomunourien", "c’homunourien",
"komzerien", "gomzerien", "c’homzerien",
@@ -493,6 +513,8 @@
"paluderien", "baluderien", "faluderien",
"pantierien", "bantierien", "fantierien",
"paotred", "baotred", "faotred",
+ "paotredoù", "baotredoù", "faotredoù",
+ "baotredigoù", "baotredigoù", "paotredigoù",
"paramantourien", "baramantourien", "faramantourien",
"pareourien", "bareourien", "fareourien",
"pardonerien", "bardonerien", "fardonerien",
@@ -580,7 +602,7 @@
"teknikourien", "deknikourien", "zeknikourien",
"telennourien", "delennourien", "zelennourien",
"tennerien", "dennerien", "zennerien",
- "teñzorierien", "deñzorierien", "zeñzorierien",
+ "teñzorerien", "deñzorerien", "zeñzorerien",
"tinellerien", "dinellerien", "zinellerien",
"tisavourien", "disavourien", "zisavourien",
"titourerien", "ditourerien", "zitourerien",
@@ -589,11 +611,14 @@
"tommerien", "dommerien", "zommerien",
"tontoned", "dontoned", "zontoned",
"torfedourien", "dorfedourien", "zorfedourien",
+ "toucherien", "doucherien", "zoucherien",
"touellerien", "douellerien", "zouellerien",
"toullerien", "doullerien", "zoullerien",
"toullerien-buñsoù", "doullerien-buñsoù", "zoullerien-buñsoù",
"toullerien-vezioù", "doullerien-vezioù", "zoullerien-vezioù",
"touristed", "douristed", "zouristed",
+ "trafikerien", "drafikerien", "zrafikerien",
+ "trapezerien", "drapezerien", "zrapezerien",
"trec’hourien", "drec’hourien", "zrec’hourien",
"tredanerien", "dredanerien", "zredanerien",
"tredanerion", "dredanerion", "zredanerion",
@@ -601,14 +626,18 @@
"tredeoged", "dredeoged", "zredeoged",
"treitourien", "dreitourien", "zreitourien",
"treizherien", "dreizherien", "zreizherien",
+ "tremenerien", "dremenerien", "zremenerien",
"tresourien", "dresourien", "zresourien",
"trevadennerien", "drevadennerien", "zrevadennerien",
"trevourien", "drevourien", "zrevourien",
"troadeien", "droadeien", "zroadeien",
+ "troergerzherien", "droergerzherien", "zroergerzherien",
"troerien", "droerien", "zroerien",
"troerien-douar", "droerien-douar", "zroerien-douar",
"troiadourien", "droiadourien", "zroiadourien",
+ "trompilherien", "drompilherien", "zrompilherien",
"trubarded", "drubarded", "zrubarded",
+ "trucherien", "drucherien", "zrucherien",
"truilhenned", "druilhenned", "zruilhenned",
"tud", "dud", "zud",
"tudonourien", "dudonourien", "zudonourien",
Modified: trunk/JLanguageTool/src/rules/br/grammar.xml
===================================================================
--- trunk/JLanguageTool/src/rules/br/grammar.xml 2012-03-22 16:07:55 UTC
(rev 6628)
+++ trunk/JLanguageTool/src/rules/br/grammar.xml 2012-03-22 18:59:40 UTC
(rev 6629)
@@ -146,6 +146,8 @@
<example type="correct">Pelec’h emañ ar baner?</example>
<example type="incorrect">Ar <marker>beleien</marker></example>
<example type="correct">Ar veleien</example>
+ <example type="incorrect">Ar <marker>bugaligoù</marker>.</example>
+ <example type="correct">Ar <marker>vugaligoù</marker>.</example>
</rule>
<rule>
<pattern mark_from="1">
@@ -722,6 +724,15 @@
<rule>
<pattern mark_from="1">
<token regexp="yes">ho|ez|az</token>
+ <token regexp="yes" postag="[^M]*M.*:1:.*"
postag_regexp="yes">w.*</token>
+ </pattern>
+ <message>Ur c’hemmadur dre galetaat a zlefe bezañ goude ar ger «\1».
Ha fellout a rae deoc’h skrivañ <suggestion>k\2</suggestion>?</message>
+ <example type="incorrect">Ho <marker>wazh</marker>.</example>
+ <example type="correct">Ho kwazh.</example>
+ </rule>
+ <rule>
+ <pattern mark_from="1">
+ <token regexp="yes">ho|ez|az</token>
<and>
<token regexp="yes" inflected="yes">d.*</token>
<token regexp="yes">[dz].*</token>
@@ -1181,9 +1192,7 @@
<token postag="N [^M]*M.*" postag_regexp="yes" regexp="yes"
inflected="yes">b.*
<exception postag="(N f s|N m p t) M.*:1a:.*" postag_regexp="yes"/>
<exception regexp="yes" inflected="yes">[vp].*</exception>
- <!-- Evit gouzout hiroc’h diwar-benn "ar vugaligoù", sellit amañ:
-
http://arbres.iker.univ-pau.fr/index.php/Doublage_des_marques_de_pluriel_sur_les_noms
-->
- <exception
regexp="yes">(vrud|vugaligoù)(-(mañ|se|hont))?</exception>
+ <exception regexp="yes">vrud(-(mañ|se|hont))?</exception>
</token>
</pattern>
<message>Direizh eo kemmañ ar ger «\2» amañ. Ha fellout a rae deoc’h
skrivañ <suggestion><match no="2" regexp_match=".(.*)"
regexp_replace="b$1"/></suggestion>?</message>
@@ -1200,7 +1209,6 @@
<example type="incorrect">Ar <marker>vrezelioù</marker></example>
<example type="incorrect">Ar <marker>prezelioù</marker></example>
<example type="correct">Ar brezelioù</example>
- <example type="correct">Ar vugaligoù.</example>
</rule>
<rule>
<pattern mark_from="1">
@@ -1264,6 +1272,7 @@
<and>
<token postag="N m p.*" postag_regexp="yes" regexp="yes">[bf].*où
<exception inflected="yes" regexp="yes">[bf].*</exception>
+ <exception postag="N m p t M:1:.*" postag_regexp="yes"/>
</token>
<token postag="N m p.*" postag_regexp="yes" regexp="yes"
inflected="yes">p.*</token>
</and>
@@ -1274,6 +1283,8 @@
<example type="correct">Ar privezioù</example>
<example type="correct">Ar fazioù</example>
<example type="correct">Ar bodadoù</example>
+ <example type="correct">Ar baotredigoù.</example>
+ <example type="correct">Ar baotredoù.</example>
</rule>
<rule>
<pattern mark_from="1">
@@ -5252,6 +5263,33 @@
<example type="correct">Pa ne labouran ket.</example>
<example type="correct">Pa ’z on skuizh.</example>
</rule>
+
+ <rulegroup id="HEMAN_HOUMAN" name="hemañ ha houmañ">
+ <rule>
+ <pattern mark_from="4">
+ <token postag="SENT_START"/>
+ <token postag="N m s.*" postag_regexp="yes"/>
+ <token>piv</token>
+ <token>eo</token>
+ <token regexp="yes">hou?mañ|hou?nnezh</token>
+ </pattern>
+ <message>Implijit <suggestion><match no="4" regexp_match="...(.*)"
regexp_replace="hen$1"/></suggestion> gant un anv gourel.</message>
+ <example type="incorrect">Karr piv eo
<marker>hounnezh</marker>?</example>
+ <example type="correct">Karr piv eo hennezh?</example>
+ </rule>
+ <rule>
+ <pattern mark_from="4">
+ <token postag="SENT_START"/>
+ <token postag="N f s.*" postag_regexp="yes"/>
+ <token>piv</token>
+ <token>eo</token>
+ <token regexp="yes">hemañ|hennezh</token>
+ </pattern>
+ <message>Implijit <suggestion><match no="4" regexp_match="..(.*)"
regexp_replace="hou$1"/></suggestion> gant un anv benel.</message>
+ <example type="incorrect">Keniterv piv eo
<marker>hennezh</marker>?</example>
+ <example type="correct">Keniterv piv eo hounnezh?</example>
+ </rule>
+ </rulegroup>
</category>
<!-- vim: foldmethod=marker foldmarker=<category,</category:
-->
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
This SF email is sponsosred by:
Try Windows Azure free for 90 days Click Here
http://p.sf.net/sfu/sfd2d-msazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs