Revision: 7387
http://languagetool.svn.sourceforge.net/languagetool/?rev=7387&view=rev
Author: dominikoeo
Date: 2012-06-17 19:00:35 +0000 (Sun, 17 Jun 2012)
Log Message:
-----------
[br] updated Breton dictionary to use Apertium svn r38896
and small changes to Perl script that creates the dictionary
for LanguageTool.
Revision Links:
--------------
http://languagetool.svn.sourceforge.net/languagetool/?rev=38896&view=rev
Modified Paths:
--------------
trunk/JLanguageTool/src/resource/br/breton.dict
trunk/JLanguageTool/src/resource/br/create-lexicon.pl
Modified: trunk/JLanguageTool/src/resource/br/breton.dict
===================================================================
(Binary files differ)
Modified: trunk/JLanguageTool/src/resource/br/create-lexicon.pl
===================================================================
--- trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-06-17
17:44:08 UTC (rev 7386)
+++ trunk/JLanguageTool/src/resource/br/create-lexicon.pl 2012-06-17
19:00:35 UTC (rev 7387)
@@ -15,10 +15,14 @@
#
# 1) Download the Apertium Breton dictionary:
# $ svn co
https://apertium.svn.sourceforge.net/svnroot/apertium/trunk/apertium-br-fr
+# $ cd apertium-br-fr/
# 2) Install Apertium tools:
# $ sudo apt-get install lttoolbox
+# 3) Download morfologik-stemming-1.4.0.zip from
+#
http://sourceforge.net/projects/morfologik/files/morfologik-stemming/1.4.0/
+# $ unzip morfologik-stemming-1.4.0.zip
+# This creates morfologik-stemming-nodict-1.4.0.jar
# 3) Run the script:
-# $ cd apertium-br-fr/
# $ ./create-lexicon.pl
#
# Author: Dominique Pelle <[email protected]>
@@ -50,6 +54,7 @@
"Alamaned",
"Amerikaned",
"Angled",
+ "Barbared", "Varbared", "Parbared",
"Bretoned", "Vretoned", "Pretoned",
"Brezhoned", "Vrezhoned", "Prezhoned",
"Eskimoed",
@@ -224,6 +229,8 @@
"deuñvien", "zeuñvien", "teuñvien",
"dezvarnourien", "zezvarnourien", "tezvarnourien",
"diazezerien", "ziazezerien", "tiazezerien",
+ "diazezourien", "ziazezourien", "tiazezourien",
+ "diazezourion", "ziazezourion", "tiazezourion",
"dibaberien", "zibaberien", "tibaberien",
"dibennerien", "zibennerien", "tibennerien",
"dibunerien", "zibunerien", "tibunerien",
@@ -297,9 +304,11 @@
"gouerien", "c’houerien", "kouerien",
"gouizieien", "c’houizieien", "kouizieien",
"gourdonerien", "c’hourdonerien", "kourdonerien",
+ "gourenerien", "c’hourenerien", "kourenerien",
"goved", "c’hoved", "koved",
"gwazed", "wazed", "kwazed",
"gwenanerien", "wenanerien", "kwenanerien",
+ "gwarded", "warded", "kwarded",
"gwerzherien", "werzherien", "kwerzherien",
"gwiaderien", "wiaderien", "kwiaderien",
"gwiaderion", "wiaderion", "kwiaderion",
@@ -489,6 +498,8 @@
"mistri-skol", "vistri-skol",
"mistri-vicherour", "vistri-vicherour",
"monitourien", "vonitourien",
+ "moraerien", "voraerien",
+ "moraerion", "voraerion",
"morlaeron", "vorlaeron",
"moruteaerien", "voruteaerien",
"mouezhierien", "vouezhierien",
@@ -909,7 +920,9 @@
}
my ($first_letter_lemma) = $lemma =~ /^(gw|[ktpgdbm]).*/i;
- my ($first_letter_word) = $word =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i;
+ $first_letter_lemma = "" unless (defined $first_letter_lemma);
+ my ($first_letter_word) = $word =~ /^([kg]w|c’h|[gdbzfktvpw]).*/i;
+ $first_letter_word = "" unless (defined $first_letter_word);
$first_letter_lemma = lc $first_letter_lemma;
$first_letter_word = lc $first_letter_word;
@@ -981,6 +994,14 @@
}
print "handled [$out_count] words, unhandled [$err_count] words\n";
+# Adding missing words in dictionary.
+# kiz exists only in expressions in Apertium (which is OK) but
+# for LanguageTool, it's easier to make it a normal word so we
+# don't give false positive on "war ho c'hiz", etc.
+print OUT "kiz\tkiz\tN f s\n";
+print OUT "c’hiz\tkiz\tN f s M:0a:2:\n";
+print OUT "giz\tkiz\tN f s M:1:1a:\n";
+
print "Lemma words missing from dictionary:\n";
foreach (sort keys %all_lemmas) { print "$_\n" unless (exists $all_words{$_});
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs