Revision: 7789
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=7789&view=rev
Author:   dnaber
Date:     2012-08-04 21:36:59 +0000 (Sat, 04 Aug 2012)
Log Message:
-----------
introduced an optional file resources/<lang>/hunspell/ignore.txt with words 
that the spell checker will ignore

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    
trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
    
trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java

Added Paths:
-----------
    trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt
    trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt
    trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/
    
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-08-04 20:27:51 UTC (rev 7788)
+++ trunk/JLanguageTool/CHANGES.txt     2012-08-04 21:36:59 UTC (rev 7789)
@@ -37,6 +37,9 @@
    -several rule updates (Marco A.G.Pinto)
 
 
+ -introduced a file resources/<lang>/hunspell/ignore.txt with words that the 
spell checker
+  will ignore
+
  -stand-alone GUI: rules can now be disabled and enabled again with a single 
click
 
  -HTTP API: "+" was incorrectly removed from input (sf bug #3543914), patch by

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java
 2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java
 2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.be;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
   private static final String RESOURCE_FILENAME = "/be/hunspell/be_BY.dict";
 
   public MorfologikBelarusianSpellerRule(ResourceBundle messages,
-                                     Language language) {
+                                     Language language) throws IOException {
     super(messages, language);
   }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
     2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java
     2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.br;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 import java.util.regex.Pattern;
 
@@ -32,7 +33,7 @@
   private static final Pattern BRETON_TOKENIZING_CHARS = Pattern.compile("-");
 
   public MorfologikBretonSpellerRule(ResourceBundle messages,
-                                     Language language) {
+                                     Language language) throws IOException {
     super(messages, language);
   }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java
    2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java
    2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.ca;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/ca/hunspell/ca_ES.dict";
     
     public MorfologikCatalanSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java
      2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java
      2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.de;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/de/hunspell/de_DE.dict";
     
     public MorfologikGermanyGermanSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java
      2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java
      2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.el;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/el/hunspell/el_GR.dict";
     
     public MorfologikGreekSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
   2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
   2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -30,7 +31,7 @@
 
   private static final String RESOURCE_FILENAME = "/en/hunspell/en_US.dict";
 
-  public MorfologikAmericanSpellerRule(ResourceBundle messages, Language 
language) {
+  public MorfologikAmericanSpellerRule(ResourceBundle messages, Language 
language) throws IOException {
     super(messages, language);
   }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java
 2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java
 2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/en/hunspell/en_AU.dict";
     
     public MorfologikAustralianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
    2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
    2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -30,7 +31,7 @@
 
   private static final String RESOURCE_FILENAME = "/en/hunspell/en_GB.dict";
 
-  public MorfologikBritishSpellerRule(ResourceBundle messages, Language 
language) {
+  public MorfologikBritishSpellerRule(ResourceBundle messages, Language 
language) throws IOException {
     super(messages, language);
   }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java
   2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java
   2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/en/hunspell/en_CA.dict";
     
     public MorfologikCanadianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java
 2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java
 2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/en/hunspell/en_NZ.dict";
     
     public MorfologikNewZealandSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java
       2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java
       2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.en;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/en/hunspell/en_ZA.dict";
     
     public MorfologikSouthAfricanSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java
    2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java
    2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.it;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/it/hunspell/it_IT.dict";
     
     public MorfologikItalianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java
 2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java
 2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.lt;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/lt/hunspell/lt_LT.dict";
     
     public MorfologikLithuanianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java
  2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java
  2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.ml;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/ml/hunspell/ml_IN.dict";
     
     public MorfologikMalayalamSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java
      2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java
      2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.nl;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/nl/hunspell/nl_NL.dict";
     
     public MorfologikDutchSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java
     2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java
     2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.pl;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/pl/hunspell/pl_PL.dict";
     
     public MorfologikPolishSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java
   2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java
   2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.ro;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/ro/hunspell/ro_RO.dict";
     
     public MorfologikRomanianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java
    2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java
    2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.ru;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -30,7 +31,7 @@
 
   private static final String RESOURCE_FILENAME = "/ru/hunspell/ru_RU.dict";
 
-  public MorfologikRussianSpellerRule(ResourceBundle messages, Language 
language) {
+  public MorfologikRussianSpellerRule(ResourceBundle messages, Language 
language) throws IOException {
     super(messages, language);
   }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java
     2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java
     2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.sk;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/sk/hunspell/sk_SK.dict";
     
     public MorfologikSlovakSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java
  2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java
  2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.sl;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/sl/hunspell/sl_SI.dict";
     
     public MorfologikSlovenianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java
 2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java
 2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,9 +19,11 @@
 package org.languagetool.rules.spelling;
 
 import java.io.IOException;
-import java.util.ResourceBundle;
+import java.io.InputStream;
+import java.util.*;
 
 import org.languagetool.AnalyzedSentence;
+import org.languagetool.JLanguageTool;
 import org.languagetool.Language;
 import org.languagetool.rules.Rule;
 import org.languagetool.rules.RuleMatch;
@@ -35,6 +37,9 @@
 
   protected final Language language;
 
+  private static final String SPELLING_IGNORE_FILE = "/hunspell/ignore.txt";
+  private final Set<String> wordsToBeIgnored = new HashSet<String>();
+
   public SpellingCheckRule(final ResourceBundle messages, final Language 
language) {
     super(messages);
     this.language = language;
@@ -58,4 +63,44 @@
   public void reset() {
   }
 
+  protected boolean ignoreWord(String word) throws IOException {
+    // TODO?: this is needed at least for German as Hunspell tokenization 
includes the dot:
+    final String cleanWord = word.endsWith(".") ? word.substring(0, 
word.length() - 1) : word;
+    return wordsToBeIgnored.contains(cleanWord);
+  }
+
+  protected void init() throws IOException {
+    loadFileIfExists(language.getShortName() + SPELLING_IGNORE_FILE);
+    loadFileIfExists(language.getShortNameWithVariant() + 
SPELLING_IGNORE_FILE);
+  }
+
+  private void loadFileIfExists(String filename) throws IOException {
+    final boolean ignoreFileExists = 
JLanguageTool.getDataBroker().resourceExists(filename);
+    if (!ignoreFileExists) {
+      return;
+    }
+    loadWordsToBeIgnored(filename);
+  }
+
+  private void loadWordsToBeIgnored(String ignoreFile) throws IOException {
+    final InputStream inputStream = 
JLanguageTool.getDataBroker().getFromResourceDirAsStream(ignoreFile);
+    try {
+      final Scanner scanner = new Scanner(inputStream);
+      try {
+        while (scanner.hasNextLine()) {
+          final String line = scanner.nextLine();
+          final boolean isComment = line.startsWith("#");
+          if (isComment) {
+            continue;
+          }
+          wordsToBeIgnored.add(line);
+        }
+      } finally {
+        scanner.close();
+      }
+    } finally {
+      inputStream.close();
+    }
+  }
+
 }

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
     2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java
     2012-08-04 21:36:59 UTC (rev 7789)
@@ -89,6 +89,10 @@
     // starting with the first token to skip the zero-length START_SENT
     int len = text.getTokens()[1].getStartPos();
     for (final String word : tokens) {
+      if (ignoreWord(word)) {
+        len += word.length() + 1;
+        continue;
+      }
       boolean isAlphabetic = true;
       if (word.length() == 1) { // hunspell dictionaries usually do not 
contain punctuation
         isAlphabetic = StringTools.isAlphabetic(word.charAt(0));
@@ -128,7 +132,8 @@
     return sb.toString();
   }
 
-  private void init() throws IOException {
+  protected void init() throws IOException {
+    super.init();
     final String langCountry = language.getShortName()
             + "_"
             + language.getCountryVariants()[0];

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
  2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
  2012-08-04 21:36:59 UTC (rev 7789)
@@ -49,15 +49,15 @@
 
     private Locale conversionLocale = Locale.getDefault();
 
-    
     /**
      * Get the filename, e.g., <tt>/resource/pl/spelling.dict</tt>.
      */
     public abstract String getFileName();        
     
-    public MorfologikSpellerRule(ResourceBundle messages, Language language) {
+    public MorfologikSpellerRule(ResourceBundle messages, Language language) 
throws IOException {
         super(messages, language);
         super.setCategory(new Category(messages.getString("category_typo")));
+        init();
     }
 
     @Override
@@ -90,13 +90,16 @@
         }
         for (AnalyzedTokenReadings token : tokens) {
             final String word = token.getToken();
+            if (ignoreWord(word)) {
+                continue;
+            }
             if (!token.isImmunized()) {
                 if (tokenizingPattern() == null) {
                     ruleMatches.addAll(getRuleMatch(word, 
token.getStartPos()));
                 } else {
                     int index = 0;
                     final Matcher m = tokenizingPattern().matcher(word);
-                    while(m.find()) {
+                    while (m.find()) {
                         final String match = word.subSequence(index, 
m.start()).toString();                        
                         ruleMatches.addAll(getRuleMatch(match, 
token.getStartPos() + index));
                         index = m.end();
@@ -157,7 +160,7 @@
      * the words as in the source dictionary. For example,
      * it may contain a hyphen, if the words with hyphens are
      * not included in the dictionary
-     * @return A compiled {@link #Pattern} that is used to tokenize words. 
+     * @return A compiled {@link Pattern} that is used to tokenize words or 
null.
      */
     public Pattern tokenizingPattern() {
         return null;

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java
  2012-08-04 20:27:51 UTC (rev 7788)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java
  2012-08-04 21:36:59 UTC (rev 7789)
@@ -19,6 +19,7 @@
 
 package org.languagetool.rules.uk;
 
+import java.io.IOException;
 import java.util.ResourceBundle;
 
 import org.languagetool.Language;
@@ -29,7 +30,7 @@
     private static final String RESOURCE_FILENAME = "/uk/hunspell/uk_UA.dict";
     
     public MorfologikUkrainianSpellerRule(ResourceBundle messages,
-            Language language) {
+            Language language) throws IOException {
         super(messages, language);
     }
 

Added: trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt
===================================================================
--- trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt                     
        (rev 0)
+++ trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt     2012-08-04 
21:36:59 UTC (rev 7789)
@@ -0,0 +1,2 @@
+# words to be ignored by the spellchecker
+einPseudoWortFürLanguageToolTests

Added: trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt
===================================================================
--- trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt                     
        (rev 0)
+++ trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt     2012-08-04 
21:36:59 UTC (rev 7789)
@@ -0,0 +1,2 @@
+# words to be ignored by the spellchecker
+anArtificialTestWordForLanguageTool

Added: 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java
                             (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java
     2012-08-04 21:36:59 UTC (rev 7789)
@@ -0,0 +1,53 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.spelling;
+
+import junit.framework.TestCase;
+import org.languagetool.JLanguageTool;
+import org.languagetool.Language;
+import org.languagetool.rules.RuleMatch;
+
+import java.io.IOException;
+import java.util.List;
+
+public class SpellingCheckRuleTest extends TestCase {
+
+  public void testIgnoreSuggestionsWithHunspell() throws IOException {
+    final JLanguageTool langTool = new JLanguageTool(Language.GERMANY_GERMAN);
+
+    final List<RuleMatch> matches = langTool.check("Das ist ein 
einPseudoWortFürLanguageToolTests");
+    assertEquals(0, matches.size());   // no error, as this word is in 
ignore.txt
+
+    final List<RuleMatch> matches2 = langTool.check("Das ist ein Tibbfehla");
+    assertEquals(1, matches2.size());
+    assertEquals("HUNSPELL_NO_SUGGEST_RULE", 
matches2.get(0).getRule().getId());
+  }
+
+  public void testIgnoreSuggestionsWithMorfologik() throws IOException {
+    final JLanguageTool langTool = new 
JLanguageTool(Language.AMERICAN_ENGLISH);
+
+    final List<RuleMatch> matches = langTool.check("This is 
anArtificialTestWordForLanguageTool.");
+    assertEquals(0, matches.size());   // no error, as this word is in 
ignore.txt
+
+    final List<RuleMatch> matches2 = langTool.check("This is a real typoh.");
+    assertEquals(1, matches2.size());
+    assertEquals("MORFOLOGIK_RULE_EN_US", matches2.get(0).getRule().getId());
+  }
+
+}

Added: 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java
                           (rev 0)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java
   2012-08-04 21:36:59 UTC (rev 7789)
@@ -0,0 +1,40 @@
+/* LanguageTool, a natural language style checker
+ * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de)
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
+ * USA
+ */
+package org.languagetool.rules.spelling;
+
+import junit.framework.TestCase;
+import org.languagetool.Language;
+
+import java.io.IOException;
+
+public class SuggestionExtractorTest extends TestCase {
+
+  public void testGetSuggestions() throws IOException {
+    final SuggestionExtractor extractor = new 
SuggestionExtractor(Language.ENGLISH);
+    assertEquals("[]", extractor.getSimpleSuggestions("Did you mean 
foo?").toString());
+    assertEquals("[foo bla]", extractor.getSimpleSuggestions("Did you mean 
<suggestion>foo bla</suggestion>?").toString());
+    assertEquals("[foo bla, xxx]", extractor.getSimpleSuggestions("Did you 
mean <suggestion>foo bla</suggestion> or 
<suggestion>xxx</suggestion>?").toString());
+    assertEquals("[foo bla, xxx]", extractor.getSimpleSuggestions("Did you 
mean <suggestion suppress_misspelled=\"yes\">foo bla</suggestion>" +
+            " or <suggestion>xxx</suggestion>?").toString());
+
+    assertEquals("[]", extractor.getSimpleSuggestions("Did you mean 
<suggestion>foo \\1</suggestion>?").toString());
+    assertEquals("[]", extractor.getSimpleSuggestions("Did you mean 
<suggestion>‚<match no=\"3\" 
include_skipped=\"following\"/></suggestion>?").toString());
+  }
+
+}

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and 
threat landscape has changed and how IT managers can respond. Discussions 
will include endpoint security, mobile security and the latest in malware 
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
Languagetool-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to