Revision: 9208
http://languagetool.svn.sourceforge.net/languagetool/?rev=9208&view=rev
Author: dnaber
Date: 2013-01-25 13:37:28 +0000 (Fri, 25 Jan 2013)
Log Message:
-----------
add a workaround for non-perfect German suggestion by using hunspell style
replacements (now: "heisst -> hei?\195?\159t") - can be removed once Morfologik
speller can do it directly
Modified Paths:
--------------
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
Modified:
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
===================================================================
---
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
2013-01-24 23:13:56 UTC (rev 9207)
+++
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
2013-01-25 13:37:28 UTC (rev 9208)
@@ -91,10 +91,15 @@
partCount++;
}
filterDupes(candidates);
- final List<String> correctWords = getCorrectWords(candidates);
- return correctWords.subList(0, Math.min(MAX_SUGGESTIONS,
correctWords.size()));
+ final List<String> suggestions = getCorrectWords(candidates);
+ final List<String> sortedSuggestions = sortSuggestionByQuality(word,
suggestions);
+ return sortedSuggestions.subList(0, Math.min(MAX_SUGGESTIONS,
sortedSuggestions.size()));
}
-
+
+ protected List<String> sortSuggestionByQuality(String misspelling,
List<String> suggestions) {
+ return suggestions;
+ }
+
private void filterDupes(List<String> words) {
final Set<String> seen = new HashSet<String>();
final Iterator<String> iterator = words.iterator();
Modified:
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
===================================================================
---
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
2013-01-24 23:13:56 UTC (rev 9207)
+++
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
2013-01-25 13:37:28 UTC (rev 9208)
@@ -40,6 +40,12 @@
private final Speller speller;
private final Locale conversionLocale;
+ /**
+ * Creates a speller with the given maximum edit distance.
+ *
+ * @param filename path in classpath to morfologik dictionary
+ * @param conversionLocale used when transforming the word to lowercase
+ */
public MorfologikSpeller(String filename, Locale conversionLocale, int
maxEditDistance) throws IOException {
if (maxEditDistance <= 0) {
throw new RuntimeException("maxEditDistance must be > 0: " +
maxEditDistance);
@@ -52,8 +58,8 @@
/**
* Creates a speller with a maximum edit distance of one.
*
- * @param filename path in classpath to morfologik dictionary.
- * @param conversionLocale
+ * @param filename path in classpath to morfologik dictionary
+ * @param conversionLocale used when transforming the word to lowercase
*/
public MorfologikSpeller(String filename, Locale conversionLocale) throws
IOException {
this(filename, conversionLocale, 1);
Modified:
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
2013-01-24 23:13:56 UTC (rev 9207)
+++
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
2013-01-25 13:37:28 UTC (rev 9208)
@@ -25,14 +25,48 @@
import org.languagetool.rules.spelling.morfologik.MorfologikSpeller;
import java.io.IOException;
-import java.util.Locale;
-import java.util.ResourceBundle;
+import java.util.*;
public class GermanSpellerRule extends CompoundAwareHunspellRule {
public static final String RULE_ID = "GERMAN_SPELLER_RULE";
private static final int MAX_EDIT_DISTANCE = 2;
+ private static final List<Replacement> REPL = new ArrayList<Replacement>();
+ static {
+ // see de_DE.aff:
+ REPL.add(new Replacement("f", "ph"));
+ REPL.add(new Replacement("ph", "f"));
+ REPL.add(new Replacement("ß", "ss"));
+ REPL.add(new Replacement("ss", "ß"));
+ REPL.add(new Replacement("s", "ss"));
+ REPL.add(new Replacement("ss", "s"));
+ REPL.add(new Replacement("i", "ie"));
+ REPL.add(new Replacement("ie", "i"));
+ REPL.add(new Replacement("ee", "e"));
+ REPL.add(new Replacement("o", "oh"));
+ REPL.add(new Replacement("oh", "o"));
+ REPL.add(new Replacement("a", "ah"));
+ REPL.add(new Replacement("ah", "a"));
+ REPL.add(new Replacement("e", "eh"));
+ REPL.add(new Replacement("eh", "e"));
+ REPL.add(new Replacement("ae", "ä"));
+ REPL.add(new Replacement("oe", "ö"));
+ REPL.add(new Replacement("ue", "ü"));
+ REPL.add(new Replacement("Ae", "Ä"));
+ REPL.add(new Replacement("Oe", "Ö"));
+ REPL.add(new Replacement("Ue", "Ü"));
+ REPL.add(new Replacement("d", "t"));
+ REPL.add(new Replacement("t", "d"));
+ REPL.add(new Replacement("th", "t"));
+ REPL.add(new Replacement("t", "th"));
+ REPL.add(new Replacement("r", "rh"));
+ REPL.add(new Replacement("ch", "k"));
+ REPL.add(new Replacement("k", "ch"));
+ // not in de_DE.aff (not clear what uppercase replacement we need...):
+ REPL.add(new Replacement("F", "Ph"));
+ REPL.add(new Replacement("Ph", "F"));
+ }
public GermanSpellerRule(ResourceBundle messages, Language language) {
super(messages, language, getCompoundSplitter(), getSpeller(language));
@@ -66,4 +100,37 @@
}
}
+ // Use hunspell-style replacements to get got suggestions for "heisse",
namely "heiße"
+ // TODO: remove this when the Morfologik speller can do this directly during
tree iteration:
+ @Override
+ protected List<String> sortSuggestionByQuality(String misspelling,
List<String> suggestions) {
+ final List<String> result = new ArrayList<String>();
+ for (String suggestion : suggestions) {
+ boolean moveSuggestionToTop = false;
+ for (Replacement replacement : REPL) {
+ final String modifiedMisspelling =
misspelling.replace(replacement.key, replacement.value);
+ final boolean equalsAfterReplacement =
modifiedMisspelling.equals(suggestion);
+ if (equalsAfterReplacement) {
+ moveSuggestionToTop = true;
+ break;
+ }
+ }
+ if (moveSuggestionToTop) {
+ // this should be preferred, as the replacements make it equal to the
suggestion:
+ result.add(0, suggestion);
+ } else {
+ result.add(suggestion);
+ }
+ }
+ return result;
+ }
+
+ private static class Replacement {
+ String key;
+ String value;
+ private Replacement(String key, String value) {
+ this.key = key;
+ this.value = value;
+ }
+ }
}
Modified:
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
2013-01-24 23:13:56 UTC (rev 9207)
+++
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
2013-01-25 13:37:28 UTC (rev 9208)
@@ -149,6 +149,23 @@
//assertCorrection(rule, "Handselvertretertreffn",
"Handelsvertretertreffen");
}
+ @Test
+ public void testGetSuggestionOrder() throws Exception {
+ final HunspellRule rule = new
GermanSpellerRule(TestTools.getMessages("German"), new GermanyGerman());
+ assertCorrectionsByOrder(rule, "heisst", "heißt", "heilst", "heimst"); //
"heißt" should be first
+ assertCorrectionsByOrder(rule, "heissen", "heißen");
+ assertCorrectionsByOrder(rule, "müßte", "müsste", "büßte"); // "müsste"
should be first
+ assertCorrectionsByOrder(rule, "schmohren", "schmoren");
+ assertCorrectionsByOrder(rule, "Fänomen", "Phänomen");
+ assertCorrectionsByOrder(rule, "homofob", "homophob");
+ assertCorrectionsByOrder(rule, "ueber", "über");
+ assertCorrectionsByOrder(rule, "uebel", "übel");
+ assertCorrectionsByOrder(rule, "Aerger", "Ärger");
+ assertCorrectionsByOrder(rule, "Walt", "Wald");
+ assertCorrectionsByOrder(rule, "Rythmus", "Rhythmus");
+ assertCorrectionsByOrder(rule, "Rytmus", "Rhythmus");
+ }
+
private void assertCorrection(HunspellRule rule, String input, String...
expectedTerms) throws IOException {
final List<String> suggestions = rule.getSuggestions(input);
for (String expectedTerm : expectedTerms) {
@@ -156,4 +173,13 @@
}
}
+ private void assertCorrectionsByOrder(HunspellRule rule, String input,
String... expectedTerms) throws IOException {
+ final List<String> suggestions = rule.getSuggestions(input);
+ int i = 0;
+ for (String expectedTerm : expectedTerms) {
+ assertTrue("Not found at position " + i + ": '" + expectedTerm + "' in:
" + suggestions, suggestions.get(i).equals(expectedTerm));
+ i++;
+ }
+ }
+
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnnow-d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits