Revision: 9893
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=9893&view=rev
Author:   jaumeortola
Date:     2013-04-07 08:36:55 +0000 (Sun, 07 Apr 2013)
Log Message:
-----------
[ca] Improve suggestions in MorfologikSpellerRule. If few suggestions are 
found, try to get more from the word without diacritics.

Modified Paths:
--------------
    
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
    
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java

Modified: 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
===================================================================
--- 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
    2013-04-07 07:36:47 UTC (rev 9892)
+++ 
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
    2013-04-07 08:36:55 UTC (rev 9893)
@@ -35,6 +35,8 @@
 import java.util.ResourceBundle;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
+import java.text.Normalizer;
+import java.text.Normalizer.Form;
 
 public abstract class MorfologikSpellerRule extends SpellingCheckRule {
 
@@ -118,11 +120,23 @@
   private List<RuleMatch> getRuleMatch(final String word, final int startPos) {
     final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
     if (speller.isMisspelled(word)) {
-      final RuleMatch ruleMatch = new RuleMatch(this,
-              startPos, startPos + word.length(),
-              messages.getString("spelling"),
-              messages.getString("desc_spelling_short"));
-      final List<String> suggestions = speller.getSuggestions(word);
+      final RuleMatch ruleMatch = new RuleMatch(this, startPos, startPos
+          + word.length(), messages.getString("spelling"),
+          messages.getString("desc_spelling_short"));
+      List<String> suggestions = speller.getSuggestions(word);
+      //If few suggestions are found, try to get more from the word without 
diacritics
+      final String wordWithoutDiacritics=removeAccents(word);
+      if (suggestions.size() < 5 && !word.equals(wordWithoutDiacritics)) {
+        List<String> moreSuggestions = 
speller.getSuggestions(wordWithoutDiacritics);
+        if (!speller.isMisspelled(wordWithoutDiacritics)) {
+          moreSuggestions.add(wordWithoutDiacritics);
+        }
+        for (int i = 0; i < moreSuggestions.size(); i++) {
+          if (!suggestions.contains(moreSuggestions.get(i))) {
+            suggestions.add(moreSuggestions.get(i));
+          }
+        }
+      }
       if (!suggestions.isEmpty()) {
         ruleMatch.setSuggestedReplacements(suggestions);
       }
@@ -145,5 +159,14 @@
   public void setIgnoreTaggedWords() {
     ignoreTaggedWords=true;
   }
+  
+  /*
+   * Remove any diacritical mark from a String
+   */
+  private static String removeAccents(String text) {
+    return text == null ? null
+        : Normalizer.normalize(text, Form.NFD)
+            .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
+  }
 
 }

Modified: 
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
===================================================================
--- 
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
   2013-04-07 07:36:47 UTC (rev 9892)
+++ 
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
   2013-04-07 08:36:55 UTC (rev 9893)
@@ -76,33 +76,63 @@
         assertEquals("Joan", matches[0].getSuggestedReplacements().get(0));
         
         matches = rule.match(langTool.getAnalyzedSentence("abatusats"));
-        // check match positions:
         assertEquals(1, matches.length);
         assertEquals(0, matches[0].getFromPos());
         assertEquals(9, matches[0].getToPos());
         assertEquals("abatussats", 
matches[0].getSuggestedReplacements().get(0));
         
         matches = rule.match(langTool.getAnalyzedSentence("L'statu"));
-        // check match positions:
         assertEquals(1, matches.length);
         assertEquals(2, matches[0].getFromPos());
         assertEquals(7, matches[0].getToPos());
         assertEquals("sta tu", matches[0].getSuggestedReplacements().get(0));
 
         matches = rule.match(langTool.getAnalyzedSentence("Pecra"));
-        // check match positions:
         assertEquals(1, matches.length);
         assertEquals(0, matches[0].getFromPos());
         assertEquals(5, matches[0].getToPos());
         assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
         
+        matches = rule.match(langTool.getAnalyzedSentence("argüit"));
+        assertEquals(1, matches.length);
+        assertEquals(0, matches[0].getFromPos());
+        assertEquals(6, matches[0].getToPos());
+        assertEquals("argüint", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("argüir", matches[0].getSuggestedReplacements().get(1));
+        assertEquals("arguït", matches[0].getSuggestedReplacements().get(2));
+        
+        matches = rule.match(langTool.getAnalyzedSentence("ángel"));
+        assertEquals(1, matches.length);
+        assertEquals(0, matches[0].getFromPos());
+        assertEquals(5, matches[0].getToPos());
+        assertEquals("Àngel", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("àngel", matches[0].getSuggestedReplacements().get(1));
+        assertEquals("angle", matches[0].getSuggestedReplacements().get(2));
+        assertEquals("anhel", matches[0].getSuggestedReplacements().get(3));
+        
+        matches = rule.match(langTool.getAnalyzedSentence("caçessim"));
+        assertEquals(1, matches.length);
+        assertEquals(0, matches[0].getFromPos());
+        assertEquals(8, matches[0].getToPos());
+        assertEquals("caçàssim", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("cacessin", matches[0].getSuggestedReplacements().get(1));
+        assertEquals("cacessis", matches[0].getSuggestedReplacements().get(2));
+        assertEquals("cacéssim", matches[0].getSuggestedReplacements().get(3));
+        
+        matches = rule.match(langTool.getAnalyzedSentence("cantaríà"));
+        assertEquals(1, matches.length);
+        assertEquals(0, matches[0].getFromPos());
+        assertEquals(8, matches[0].getToPos());
+        assertEquals("cantarà", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("cantaria", matches[0].getSuggestedReplacements().get(1));
+        
         //capitalized wrong words
         matches = rule.match(langTool.getAnalyzedSentence("En la Pecra"));
-        // check match positions:
         assertEquals(1, matches.length);
         assertEquals(6, matches[0].getFromPos());
         assertEquals(11, matches[0].getToPos());
-        assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
+        assertEquals("Pedra", matches[0].getSuggestedReplacements().get(0));
+        assertEquals("Peira", matches[0].getSuggestedReplacements().get(1));
         
         assertEquals(1, 
rule.match(langTool.getAnalyzedSentence("aõh")).length);
         assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length); 
       

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire 
the most talented Cisco Certified professionals. Visit the 
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to