Revision: 6076
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6076&view=rev
Author:   dominikoeo
Date:     2011-12-20 22:35:03 +0000 (Tue, 20 Dec 2011)
Log Message:
-----------
[br] several updates to script which creates breton.dict
     from Apertium dictionary.

Modified Paths:
--------------
    trunk/JLanguageTool/src/resource/br/create-lexicon.pl

Modified: trunk/JLanguageTool/src/resource/br/create-lexicon.pl
===================================================================
--- trunk/JLanguageTool/src/resource/br/create-lexicon.pl       2011-12-20 
22:34:09 UTC (rev 6075)
+++ trunk/JLanguageTool/src/resource/br/create-lexicon.pl       2011-12-20 
22:35:03 UTC (rev 6076)
@@ -50,17 +50,53 @@
   "Gallaoued",          "C’hallaoued",    "Kallaoued",
   "gallegerien",        "c’hallegerien",  "kallegerien",
   "gallegerion",        "c’hallegerion",  "kallegerion",
+  "bachelourien",       "vachelourien",   "pachelourien",
+  "bac’herien",         "vac’herien",     "pac’herien",
+  "bac’herion",         "vac’herion",     "pac’herion",
+  "bagsavourien",       "vagsavourien",   "pagsavourien",
+  "bamerien",           "vamerien",       "pamerien",
+  "bamerion",           "vamerion",       "pamerion",
   "baleerien",          "valeerien",      "paleerien",
   "baraerien",          "varaerien",      "paraerien",
+  "baraerion",          "varaerion",      "paraerion",
+  "barnerien",          "varnerien",      "parnerien",
+  "barnerion",          "varnerion",      "parnerion",
   "barzhed",            "varzhed",        "parzhed",
   "beajourien",         "veajourien",     "peajourien",
+  "bedoniourien",       "vedoniourien",   "pedoniourien",
+  "begennelourien",     "vegennelourien", "pegennelourien",
   "beleien",            "veleien",        "peleien",
+  "benerien",           "venerien",       "penerien",
+  "bevoniourien",       "vevonourien",    "pevonourien",
+  "bigrierien",         "vigrierien",     "pigrierien",
+  "biniaouerien",       "viniaouerien",   "piniaouerien",
+  "biolinourien",       "violinourien",   "piolinourien",
   "bleinerien",         "vleinerien",     "pleinerien",
   "bleinerion",         "vleinerion",     "pleinerion",
+  "bonelourien",        "vonelourien",    "ponelourien",
+  "bouloñjerien",       "vouloñjerien",   "pouloñjerien",
+  "bombarderien",       "vombarderien",   "pombarderien",
+  "braventiourien",     "vraventiourien", "praventiourien",
+  "bredklañvourien",    "vredklañvourien", "predklañvourien",
+  "bredoniourien",      "vredoniourien",  "predoniourien",
+  "bresourien",         "vresourien",     "presourien",
   "breudeur",           "vreudeur",       "preudeur",
+  "Bretoned",           "Vretoned",       "Pretoned",
+  "brezhonegerien",     "vrezhonegerien", "prezhonegerien",
+  "Brezhoned",          "Vrezhoned",      "Prezhoned",
   "breutaerien",        "vreutaerien",    "preutaerien",
+  "brezelourien",       "vrezelourien",   "prezelourien",
   "brezhonegerien",     "vrezhonegerien", "prezhonegerien",
+  "brigadennourien",    "vrigadennourien", "prigadennourien",
+  "brizhkeltiegourien", "vrizhkeltiegourien", "prizhkeltiegourien",
+  "brizhkredennourien", "vrizhkredennourien", "prizhkredennourien",
+  "broadelourien",      "vroadelourien",  "proadelourien",
+  "brogarourien",       "vrogarourien",   "progarourien",
+  "brozennourien",      "vrozennourien",  "prozennourien",
+  "brudourien",         "vrudourien",     "prudourien",
+  "buhezegezhourien",   "vuhezegezhourien", "puhezegezhourien",
   "bugale",             "vugale",         "pugale",
+  "bugulien",           "vugulien",       "pugulien",
   "butunerien",         "vutunerien",     "putunerien",
   "butunerion",         "vutunerion",     "putunerion",
   "dañserien",                            "tañserien",
@@ -131,6 +167,8 @@
   "touristed",          "douristed",                       "zouristed",
   "tredanerien",        "dredanerien",                     "zredanerien",
   "tredanerion",        "dredanerion",                     "zredanerion",
+  "tredeeged",          "dredeeged",                       "zredeeged",
+  "tredeoged",          "dredeoged",                       "zredeoged",
   "tud",                "dud",                             "zud",
 );
 my %anv_lies_tud = map { $_ => 0 } @anv_lies_tud;
@@ -383,6 +421,12 @@
     elsif ($tags eq '<vbloc><pii><p3><pl>')     { $tag = "V impl 3 p" }     # 
edont
     elsif ($tags eq '<vbloc><pii><impers><sp>') { $tag = "V impl impers" }  # 
emod
 
+    # Words that we tag as both masculine, feminine
+    # even though Apertium does not tag them with both gender.
+    if ($lemma eq 'trubuilh' and $word =~ /^[tdz]rubuilh(où)?$/) {
+      $tag =~ s/^N m/N e/;
+    }
+
     if ($tag =~ /N m p/) {
       if (exists $anv_lies_tud{$word} or $word =~ /[A-Z].*iz$/) {
         $tag .= ' t';
@@ -391,26 +435,26 @@
     }
 
     my ($first_letter_lemma) = $lemma =~ /^(gw|[ktpgdbm]).*/i;
-    my ($first_letter_word)  = $word  =~ /^([kg]w|c’h|[gdbzfktvp]).*/i;
+    my ($first_letter_word)  = $word  =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i;
     $first_letter_lemma = lc $first_letter_lemma;
     $first_letter_word  = lc $first_letter_word;
 
-    if    ($lemma eq 'kaout')  { }
+    if    ($lemma eq 'kaout' and !($word =~ '.*aout')) { }
     elsif ($word  eq 'tud')    { }
-    elsif ($word  eq 'dud')    { $tag .= "M:1:1a" }
-    elsif ($word  eq 'zud')    { $tag .= "M:2:" }
+    elsif ($word  eq 'dud')    { $tag .= " M:1:1a" }
+    elsif ($word  eq 'zud')    { $tag .= " M:2:" }
     elsif ($word  eq 'diweuz') { }
-    elsif ($word  eq 'tiweuz') { $tag .= "M:3:" }
-    elsif ($word  eq 'ziweuz') { $tag .= "M:1:1b:" }
+    elsif ($word  eq 'tiweuz') { $tag .= " M:3:" }
+    elsif ($word  eq 'ziweuz') { $tag .= " M:1:1b:" }
     elsif ($word =~ '^kezeg-?(koad|mor|blein)?$')   { }
-    elsif ($word =~ '^gezeg-?(koad|mor|blein)?$')   { $tag .= "M:1:1a:" }
-    elsif ($word =~ '^c’hezeg-?(koad|mor|blein)?$') { $tag .= "M:2:" }
+    elsif ($word =~ '^gezeg-?(koad|mor|blein)?$')   { $tag .= " M:1:1a:" }
+    elsif ($word =~ '^c’hezeg-?(koad|mor|blein)?$') { $tag .= " M:2:" }
     elsif ($word =~ '^daou(lin|lagad)$')            { }
-    elsif ($word =~ '^taou(lin|lagad)$')            { $tag .= "M:3:" }
-    elsif ($word =~ '^zaou(lin|lagad)$')            { $tag .= "M:1:1b:" }
+    elsif ($word =~ '^taou(lin|lagad)$')            { $tag .= " M:3:" }
+    elsif ($word =~ '^zaou(lin|lagad)$')            { $tag .= " M:1:1b:" }
     elsif ($word =~ '^div(c’har|esker|rec’h|ronn|orzhed|jod|skouarn)$') { }
-    elsif ($word =~ '^tiv(c’har|esker|rec’h|ronn|orzhed|jod|skouarn)$') { $tag 
.= "M:3:" }
-    elsif ($word =~ '^ziv(c’har|esker|rec’h|ronn|orzhed|jod|skouarn)$') { $tag 
.= "M:1:1b:" }
+    elsif ($word =~ '^tiv(c’har|esker|rec’h|ronn|orzhed|jod|skouarn)$') { $tag 
.= " M:3:" }
+    elsif ($word =~ '^ziv(c’har|esker|rec’h|ronn|orzhed|jod|skouarn)$') { $tag 
.= " M:1:1b:" }
     elsif   ($first_letter_lemma and 
              $first_letter_word  and 
              $first_letter_lemma ne $first_letter_word and
@@ -426,13 +470,13 @@
       } elsif ($first_letter_lemma eq 'p')   {
         if    ($first_letter_word  eq 'b')   { $tag .= " M:1:1a:" }
         elsif ($first_letter_word  eq 'f')   { $tag .= " M:2:" }
-      } elsif ($first_letter_lemma eq 'g')   {
-        if    ($first_letter_word  eq 'c’h') { $tag .= " M:1:1a:1b:4:" }
-        elsif ($first_letter_word  eq 'k')   { $tag .= " M:3:" }
       } elsif ($first_letter_lemma eq 'gw')  {
         if    ($first_letter_word  eq 'w')   { $tag .= " M:1:1a:1b:4:" }
         elsif ($first_letter_word  eq 'kw')  { $tag .= " M:3:" }
         elsif ($first_letter_word  eq 'c’h') { $tag .= " M:4:" }
+      } elsif ($first_letter_lemma eq 'g')   {
+        if    ($first_letter_word  eq 'c’h') { $tag .= " M:1:1a:1b:4:" }
+        elsif ($first_letter_word  eq 'k')   { $tag .= " M:3:" }
       } elsif ($first_letter_lemma eq 'd')   {
         if    ($first_letter_word  eq 'z')   { $tag .= " M:1:1b:4:" }
         elsif ($first_letter_word  eq 't')   { $tag .= " M:3:4:" }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Write once. Port to many.
Get the SDK and tools to simplify cross-platform app development. Create 
new or port existing apps to sell to consumers worldwide. Explore the 
Intel AppUpSM program developer opportunity. appdeveloper.intel.com/join
http://p.sf.net/sfu/intel-appdev
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to