Revision: 9208
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=9208&view=rev
Author:   dnaber
Date:     2013-01-25 13:37:28 +0000 (Fri, 25 Jan 2013)
Log Message:
-----------
add a workaround for non-perfect German suggestion by using hunspell style 
replacements (now: "heisst -> hei?\195?\159t") - can be removed once Morfologik 
speller can do it directly

Modified Paths:
--------------
    
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
    
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
    
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
    
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java

Modified: 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
===================================================================
--- 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
  2013-01-24 23:13:56 UTC (rev 9207)
+++ 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java
  2013-01-25 13:37:28 UTC (rev 9208)
@@ -91,10 +91,15 @@
       partCount++;
     }
     filterDupes(candidates);
-    final List<String> correctWords = getCorrectWords(candidates);
-    return correctWords.subList(0, Math.min(MAX_SUGGESTIONS, 
correctWords.size()));
+    final List<String> suggestions = getCorrectWords(candidates);
+    final List<String> sortedSuggestions = sortSuggestionByQuality(word, 
suggestions);
+    return sortedSuggestions.subList(0, Math.min(MAX_SUGGESTIONS, 
sortedSuggestions.size()));
   }
-  
+
+  protected List<String> sortSuggestionByQuality(String misspelling, 
List<String> suggestions) {
+    return suggestions;
+  }
+
   private void filterDupes(List<String> words) {
     final Set<String> seen = new HashSet<String>();
     final Iterator<String> iterator = words.iterator();

Modified: 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
===================================================================
--- 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
        2013-01-24 23:13:56 UTC (rev 9207)
+++ 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpeller.java
        2013-01-25 13:37:28 UTC (rev 9208)
@@ -40,6 +40,12 @@
   private final Speller speller;
   private final Locale conversionLocale;
 
+  /**
+   * Creates a speller with the given maximum edit distance.
+   * 
+   * @param filename path in classpath to morfologik dictionary
+   * @param conversionLocale used when transforming the word to lowercase
+   */
   public MorfologikSpeller(String filename, Locale conversionLocale, int 
maxEditDistance) throws IOException {
     if (maxEditDistance <= 0) {
       throw new RuntimeException("maxEditDistance must be > 0: " + 
maxEditDistance);
@@ -52,8 +58,8 @@
   /**
    * Creates a speller with a maximum edit distance of one.
    * 
-   * @param filename path in classpath to morfologik dictionary.
-   * @param conversionLocale
+   * @param filename path in classpath to morfologik dictionary
+   * @param conversionLocale used when transforming the word to lowercase
    */
   public MorfologikSpeller(String filename, Locale conversionLocale) throws 
IOException {
     this(filename, conversionLocale, 1);

Modified: 
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
===================================================================
--- 
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
  2013-01-24 23:13:56 UTC (rev 9207)
+++ 
trunk/languagetool/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java
  2013-01-25 13:37:28 UTC (rev 9208)
@@ -25,14 +25,48 @@
 import org.languagetool.rules.spelling.morfologik.MorfologikSpeller;
 
 import java.io.IOException;
-import java.util.Locale;
-import java.util.ResourceBundle;
+import java.util.*;
 
 public class GermanSpellerRule extends CompoundAwareHunspellRule {
 
   public static final String RULE_ID = "GERMAN_SPELLER_RULE";
   
   private static final int MAX_EDIT_DISTANCE = 2;
+  private static final List<Replacement> REPL = new ArrayList<Replacement>();
+  static {
+    // see de_DE.aff:
+    REPL.add(new Replacement("f", "ph"));
+    REPL.add(new Replacement("ph", "f"));
+    REPL.add(new Replacement("ß", "ss"));
+    REPL.add(new Replacement("ss", "ß"));
+    REPL.add(new Replacement("s", "ss"));
+    REPL.add(new Replacement("ss", "s"));
+    REPL.add(new Replacement("i", "ie"));
+    REPL.add(new Replacement("ie", "i"));
+    REPL.add(new Replacement("ee", "e"));
+    REPL.add(new Replacement("o", "oh"));
+    REPL.add(new Replacement("oh", "o"));
+    REPL.add(new Replacement("a", "ah"));
+    REPL.add(new Replacement("ah", "a"));
+    REPL.add(new Replacement("e", "eh"));
+    REPL.add(new Replacement("eh", "e"));
+    REPL.add(new Replacement("ae", "ä"));
+    REPL.add(new Replacement("oe", "ö"));
+    REPL.add(new Replacement("ue", "ü"));
+    REPL.add(new Replacement("Ae", "Ä"));
+    REPL.add(new Replacement("Oe", "Ö"));
+    REPL.add(new Replacement("Ue", "Ü"));
+    REPL.add(new Replacement("d", "t"));
+    REPL.add(new Replacement("t", "d"));
+    REPL.add(new Replacement("th", "t"));
+    REPL.add(new Replacement("t", "th"));
+    REPL.add(new Replacement("r", "rh"));
+    REPL.add(new Replacement("ch", "k"));
+    REPL.add(new Replacement("k", "ch"));
+    // not in de_DE.aff (not clear what uppercase replacement we need...):
+    REPL.add(new Replacement("F", "Ph"));
+    REPL.add(new Replacement("Ph", "F"));
+  }
 
   public GermanSpellerRule(ResourceBundle messages, Language language) {
     super(messages, language, getCompoundSplitter(), getSpeller(language));
@@ -66,4 +100,37 @@
     }
   }
 
+  // Use hunspell-style replacements to get got suggestions for "heisse", 
namely "heiße"
+  // TODO: remove this when the Morfologik speller can do this directly during 
tree iteration:
+  @Override
+  protected List<String> sortSuggestionByQuality(String misspelling, 
List<String> suggestions) {
+    final List<String> result = new ArrayList<String>();
+    for (String suggestion : suggestions) {
+      boolean moveSuggestionToTop = false;
+      for (Replacement replacement : REPL) {
+        final String modifiedMisspelling = 
misspelling.replace(replacement.key, replacement.value);
+        final boolean equalsAfterReplacement = 
modifiedMisspelling.equals(suggestion);
+        if (equalsAfterReplacement) {
+          moveSuggestionToTop = true;
+          break;
+        }
+      }
+      if (moveSuggestionToTop) {
+        // this should be preferred, as the replacements make it equal to the 
suggestion:
+        result.add(0, suggestion);
+      } else {
+        result.add(suggestion);
+      }
+    }
+    return result;
+  }
+
+  private static class Replacement {
+    String key;
+    String value;
+    private Replacement(String key, String value) {
+      this.key = key;
+      this.value = value;
+    }
+  }
 }

Modified: 
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
===================================================================
--- 
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
      2013-01-24 23:13:56 UTC (rev 9207)
+++ 
trunk/languagetool/languagetool-language-modules/de/src/test/java/org/languagetool/rules/de/GermanSpellerRuleTest.java
      2013-01-25 13:37:28 UTC (rev 9208)
@@ -149,6 +149,23 @@
     //assertCorrection(rule, "Handselvertretertreffn", 
"Handelsvertretertreffen");
   }
 
+  @Test
+  public void testGetSuggestionOrder() throws Exception {
+    final HunspellRule rule = new 
GermanSpellerRule(TestTools.getMessages("German"), new GermanyGerman());
+    assertCorrectionsByOrder(rule, "heisst", "heißt", "heilst", "heimst");  // 
"heißt" should be first
+    assertCorrectionsByOrder(rule, "heissen", "heißen");
+    assertCorrectionsByOrder(rule, "müßte", "müsste", "büßte");  // "müsste" 
should be first
+    assertCorrectionsByOrder(rule, "schmohren", "schmoren");
+    assertCorrectionsByOrder(rule, "Fänomen", "Phänomen");
+    assertCorrectionsByOrder(rule, "homofob", "homophob");
+    assertCorrectionsByOrder(rule, "ueber", "über");
+    assertCorrectionsByOrder(rule, "uebel", "übel");
+    assertCorrectionsByOrder(rule, "Aerger", "Ärger");
+    assertCorrectionsByOrder(rule, "Walt", "Wald");
+    assertCorrectionsByOrder(rule, "Rythmus", "Rhythmus");
+    assertCorrectionsByOrder(rule, "Rytmus", "Rhythmus");
+  }
+  
   private void assertCorrection(HunspellRule rule, String input, String... 
expectedTerms) throws IOException {
     final List<String> suggestions = rule.getSuggestions(input);
     for (String expectedTerm : expectedTerms) {
@@ -156,4 +173,13 @@
     }
   }
   
+  private void assertCorrectionsByOrder(HunspellRule rule, String input, 
String... expectedTerms) throws IOException {
+    final List<String> suggestions = rule.getSuggestions(input);
+    int i = 0;
+    for (String expectedTerm : expectedTerms) {
+      assertTrue("Not found at position " + i + ": '" + expectedTerm + "' in: 
" + suggestions, suggestions.get(i).equals(expectedTerm));
+      i++;
+    }
+  }
+  
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Master Visual Studio, SharePoint, SQL, ASP.NET, C# 2012, HTML5, CSS,
MVC, Windows 8 Apps, JavaScript and much more. Keep your skills current
with LearnDevNow - 3,200 step-by-step video tutorials by Microsoft
MVPs and experts. ON SALE this month only -- learn more at:
http://p.sf.net/sfu/learnnow-d2d
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to