Revision: 7789 http://languagetool.svn.sourceforge.net/languagetool/?rev=7789&view=rev Author: dnaber Date: 2012-08-04 21:36:59 +0000 (Sat, 04 Aug 2012) Log Message: ----------- introduced an optional file resources/<lang>/hunspell/ignore.txt with words that the spell checker will ignore
Modified Paths: -------------- trunk/JLanguageTool/CHANGES.txt trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java Added Paths: ----------- trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/ trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java Modified: trunk/JLanguageTool/CHANGES.txt =================================================================== --- trunk/JLanguageTool/CHANGES.txt 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/CHANGES.txt 2012-08-04 21:36:59 UTC (rev 7789) @@ -37,6 +37,9 @@ -several rule updates (Marco A.G.Pinto) + -introduced a file resources/<lang>/hunspell/ignore.txt with words that the spell checker + will ignore + -stand-alone GUI: rules can now be disabled and enabled again with a single click -HTTP API: "+" was incorrectly removed from input (sf bug #3543914), patch by Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/be/MorfologikBelarusianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.be; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/be/hunspell/be_BY.dict"; public MorfologikBelarusianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/br/MorfologikBretonSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.br; +import java.io.IOException; import java.util.ResourceBundle; import java.util.regex.Pattern; @@ -32,7 +33,7 @@ private static final Pattern BRETON_TOKENIZING_CHARS = Pattern.compile("-"); public MorfologikBretonSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.ca; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/ca/hunspell/ca_ES.dict"; public MorfologikCatalanSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/de/MorfologikGermanyGermanSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.de; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/de/hunspell/de_DE.dict"; public MorfologikGermanyGermanSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/el/MorfologikGreekSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.el; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/el/hunspell/el_GR.dict"; public MorfologikGreekSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -30,7 +31,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_US.dict"; - public MorfologikAmericanSpellerRule(ResourceBundle messages, Language language) { + public MorfologikAmericanSpellerRule(ResourceBundle messages, Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAustralianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_AU.dict"; public MorfologikAustralianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -30,7 +31,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_GB.dict"; - public MorfologikBritishSpellerRule(ResourceBundle messages, Language language) { + public MorfologikBritishSpellerRule(ResourceBundle messages, Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikCanadianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_CA.dict"; public MorfologikCanadianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikNewZealandSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_NZ.dict"; public MorfologikNewZealandSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikSouthAfricanSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.en; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/en/hunspell/en_ZA.dict"; public MorfologikSouthAfricanSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/it/MorfologikItalianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.it; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/it/hunspell/it_IT.dict"; public MorfologikItalianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/lt/MorfologikLithuanianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.lt; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/lt/hunspell/lt_LT.dict"; public MorfologikLithuanianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ml/MorfologikMalayalamSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.ml; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/ml/hunspell/ml_IN.dict"; public MorfologikMalayalamSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/nl/MorfologikDutchSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.nl; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/nl/hunspell/nl_NL.dict"; public MorfologikDutchSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/pl/MorfologikPolishSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.pl; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/pl/hunspell/pl_PL.dict"; public MorfologikPolishSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ro/MorfologikRomanianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.ro; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/ro/hunspell/ro_RO.dict"; public MorfologikRomanianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/ru/MorfologikRussianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.ru; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -30,7 +31,7 @@ private static final String RESOURCE_FILENAME = "/ru/hunspell/ru_RU.dict"; - public MorfologikRussianSpellerRule(ResourceBundle messages, Language language) { + public MorfologikRussianSpellerRule(ResourceBundle messages, Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/sk/MorfologikSlovakSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.sk; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/sk/hunspell/sk_SK.dict"; public MorfologikSlovakSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/sl/MorfologikSlovenianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.sl; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/sl/hunspell/sl_SI.dict"; public MorfologikSlovenianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,9 +19,11 @@ package org.languagetool.rules.spelling; import java.io.IOException; -import java.util.ResourceBundle; +import java.io.InputStream; +import java.util.*; import org.languagetool.AnalyzedSentence; +import org.languagetool.JLanguageTool; import org.languagetool.Language; import org.languagetool.rules.Rule; import org.languagetool.rules.RuleMatch; @@ -35,6 +37,9 @@ protected final Language language; + private static final String SPELLING_IGNORE_FILE = "/hunspell/ignore.txt"; + private final Set<String> wordsToBeIgnored = new HashSet<String>(); + public SpellingCheckRule(final ResourceBundle messages, final Language language) { super(messages); this.language = language; @@ -58,4 +63,44 @@ public void reset() { } + protected boolean ignoreWord(String word) throws IOException { + // TODO?: this is needed at least for German as Hunspell tokenization includes the dot: + final String cleanWord = word.endsWith(".") ? word.substring(0, word.length() - 1) : word; + return wordsToBeIgnored.contains(cleanWord); + } + + protected void init() throws IOException { + loadFileIfExists(language.getShortName() + SPELLING_IGNORE_FILE); + loadFileIfExists(language.getShortNameWithVariant() + SPELLING_IGNORE_FILE); + } + + private void loadFileIfExists(String filename) throws IOException { + final boolean ignoreFileExists = JLanguageTool.getDataBroker().resourceExists(filename); + if (!ignoreFileExists) { + return; + } + loadWordsToBeIgnored(filename); + } + + private void loadWordsToBeIgnored(String ignoreFile) throws IOException { + final InputStream inputStream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(ignoreFile); + try { + final Scanner scanner = new Scanner(inputStream); + try { + while (scanner.hasNextLine()) { + final String line = scanner.nextLine(); + final boolean isComment = line.startsWith("#"); + if (isComment) { + continue; + } + wordsToBeIgnored.add(line); + } + } finally { + scanner.close(); + } + } finally { + inputStream.close(); + } + } + } Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -89,6 +89,10 @@ // starting with the first token to skip the zero-length START_SENT int len = text.getTokens()[1].getStartPos(); for (final String word : tokens) { + if (ignoreWord(word)) { + len += word.length() + 1; + continue; + } boolean isAlphabetic = true; if (word.length() == 1) { // hunspell dictionaries usually do not contain punctuation isAlphabetic = StringTools.isAlphabetic(word.charAt(0)); @@ -128,7 +132,8 @@ return sb.toString(); } - private void init() throws IOException { + protected void init() throws IOException { + super.init(); final String langCountry = language.getShortName() + "_" + language.getCountryVariants()[0]; Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -49,15 +49,15 @@ private Locale conversionLocale = Locale.getDefault(); - /** * Get the filename, e.g., <tt>/resource/pl/spelling.dict</tt>. */ public abstract String getFileName(); - public MorfologikSpellerRule(ResourceBundle messages, Language language) { + public MorfologikSpellerRule(ResourceBundle messages, Language language) throws IOException { super(messages, language); super.setCategory(new Category(messages.getString("category_typo"))); + init(); } @Override @@ -90,13 +90,16 @@ } for (AnalyzedTokenReadings token : tokens) { final String word = token.getToken(); + if (ignoreWord(word)) { + continue; + } if (!token.isImmunized()) { if (tokenizingPattern() == null) { ruleMatches.addAll(getRuleMatch(word, token.getStartPos())); } else { int index = 0; final Matcher m = tokenizingPattern().matcher(word); - while(m.find()) { + while (m.find()) { final String match = word.subSequence(index, m.start()).toString(); ruleMatches.addAll(getRuleMatch(match, token.getStartPos() + index)); index = m.end(); @@ -157,7 +160,7 @@ * the words as in the source dictionary. For example, * it may contain a hyphen, if the words with hyphens are * not included in the dictionary - * @return A compiled {@link #Pattern} that is used to tokenize words. + * @return A compiled {@link Pattern} that is used to tokenize words or null. */ public Pattern tokenizingPattern() { return null; Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java 2012-08-04 20:27:51 UTC (rev 7788) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/uk/MorfologikUkrainianSpellerRule.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -19,6 +19,7 @@ package org.languagetool.rules.uk; +import java.io.IOException; import java.util.ResourceBundle; import org.languagetool.Language; @@ -29,7 +30,7 @@ private static final String RESOURCE_FILENAME = "/uk/hunspell/uk_UA.dict"; public MorfologikUkrainianSpellerRule(ResourceBundle messages, - Language language) { + Language language) throws IOException { super(messages, language); } Added: trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt =================================================================== --- trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt (rev 0) +++ trunk/JLanguageTool/src/resource/de/hunspell/ignore.txt 2012-08-04 21:36:59 UTC (rev 7789) @@ -0,0 +1,2 @@ +# words to be ignored by the spellchecker +einPseudoWortFürLanguageToolTests Added: trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt =================================================================== --- trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt (rev 0) +++ trunk/JLanguageTool/src/resource/en/hunspell/ignore.txt 2012-08-04 21:36:59 UTC (rev 7789) @@ -0,0 +1,2 @@ +# words to be ignored by the spellchecker +anArtificialTestWordForLanguageTool Added: trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java (rev 0) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SpellingCheckRuleTest.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -0,0 +1,53 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.spelling; + +import junit.framework.TestCase; +import org.languagetool.JLanguageTool; +import org.languagetool.Language; +import org.languagetool.rules.RuleMatch; + +import java.io.IOException; +import java.util.List; + +public class SpellingCheckRuleTest extends TestCase { + + public void testIgnoreSuggestionsWithHunspell() throws IOException { + final JLanguageTool langTool = new JLanguageTool(Language.GERMANY_GERMAN); + + final List<RuleMatch> matches = langTool.check("Das ist ein einPseudoWortFürLanguageToolTests"); + assertEquals(0, matches.size()); // no error, as this word is in ignore.txt + + final List<RuleMatch> matches2 = langTool.check("Das ist ein Tibbfehla"); + assertEquals(1, matches2.size()); + assertEquals("HUNSPELL_NO_SUGGEST_RULE", matches2.get(0).getRule().getId()); + } + + public void testIgnoreSuggestionsWithMorfologik() throws IOException { + final JLanguageTool langTool = new JLanguageTool(Language.AMERICAN_ENGLISH); + + final List<RuleMatch> matches = langTool.check("This is anArtificialTestWordForLanguageTool."); + assertEquals(0, matches.size()); // no error, as this word is in ignore.txt + + final List<RuleMatch> matches2 = langTool.check("This is a real typoh."); + assertEquals(1, matches2.size()); + assertEquals("MORFOLOGIK_RULE_EN_US", matches2.get(0).getRule().getId()); + } + +} Added: trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java (rev 0) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/spelling/SuggestionExtractorTest.java 2012-08-04 21:36:59 UTC (rev 7789) @@ -0,0 +1,40 @@ +/* LanguageTool, a natural language style checker + * Copyright (C) 2012 Daniel Naber (http://www.danielnaber.de) + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 + * USA + */ +package org.languagetool.rules.spelling; + +import junit.framework.TestCase; +import org.languagetool.Language; + +import java.io.IOException; + +public class SuggestionExtractorTest extends TestCase { + + public void testGetSuggestions() throws IOException { + final SuggestionExtractor extractor = new SuggestionExtractor(Language.ENGLISH); + assertEquals("[]", extractor.getSimpleSuggestions("Did you mean foo?").toString()); + assertEquals("[foo bla]", extractor.getSimpleSuggestions("Did you mean <suggestion>foo bla</suggestion>?").toString()); + assertEquals("[foo bla, xxx]", extractor.getSimpleSuggestions("Did you mean <suggestion>foo bla</suggestion> or <suggestion>xxx</suggestion>?").toString()); + assertEquals("[foo bla, xxx]", extractor.getSimpleSuggestions("Did you mean <suggestion suppress_misspelled=\"yes\">foo bla</suggestion>" + + " or <suggestion>xxx</suggestion>?").toString()); + + assertEquals("[]", extractor.getSimpleSuggestions("Did you mean <suggestion>foo \\1</suggestion>?").toString()); + assertEquals("[]", extractor.getSimpleSuggestions("Did you mean <suggestion>‚<match no=\"3\" include_skipped=\"following\"/></suggestion>?").toString()); + } + +} This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Live Security Virtual Conference Exclusive live event will cover all the ways today's security and threat landscape has changed and how IT managers can respond. Discussions will include endpoint security, mobile security and the latest in malware threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/ _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs