Revision: 9893
http://languagetool.svn.sourceforge.net/languagetool/?rev=9893&view=rev
Author: jaumeortola
Date: 2013-04-07 08:36:55 +0000 (Sun, 07 Apr 2013)
Log Message:
-----------
[ca] Improve suggestions in MorfologikSpellerRule. If few suggestions are
found, try to get more from the word without diacritics.
Modified Paths:
--------------
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
Modified:
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
===================================================================
---
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
2013-04-07 07:36:47 UTC (rev 9892)
+++
trunk/languagetool/languagetool-core/src/main/java/org/languagetool/rules/spelling/morfologik/MorfologikSpellerRule.java
2013-04-07 08:36:55 UTC (rev 9893)
@@ -35,6 +35,8 @@
import java.util.ResourceBundle;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
+import java.text.Normalizer;
+import java.text.Normalizer.Form;
public abstract class MorfologikSpellerRule extends SpellingCheckRule {
@@ -118,11 +120,23 @@
private List<RuleMatch> getRuleMatch(final String word, final int startPos) {
final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
if (speller.isMisspelled(word)) {
- final RuleMatch ruleMatch = new RuleMatch(this,
- startPos, startPos + word.length(),
- messages.getString("spelling"),
- messages.getString("desc_spelling_short"));
- final List<String> suggestions = speller.getSuggestions(word);
+ final RuleMatch ruleMatch = new RuleMatch(this, startPos, startPos
+ + word.length(), messages.getString("spelling"),
+ messages.getString("desc_spelling_short"));
+ List<String> suggestions = speller.getSuggestions(word);
+ //If few suggestions are found, try to get more from the word without
diacritics
+ final String wordWithoutDiacritics=removeAccents(word);
+ if (suggestions.size() < 5 && !word.equals(wordWithoutDiacritics)) {
+ List<String> moreSuggestions =
speller.getSuggestions(wordWithoutDiacritics);
+ if (!speller.isMisspelled(wordWithoutDiacritics)) {
+ moreSuggestions.add(wordWithoutDiacritics);
+ }
+ for (int i = 0; i < moreSuggestions.size(); i++) {
+ if (!suggestions.contains(moreSuggestions.get(i))) {
+ suggestions.add(moreSuggestions.get(i));
+ }
+ }
+ }
if (!suggestions.isEmpty()) {
ruleMatch.setSuggestedReplacements(suggestions);
}
@@ -145,5 +159,14 @@
public void setIgnoreTaggedWords() {
ignoreTaggedWords=true;
}
+
+ /*
+ * Remove any diacritical mark from a String
+ */
+ private static String removeAccents(String text) {
+ return text == null ? null
+ : Normalizer.normalize(text, Form.NFD)
+ .replaceAll("\\p{InCombiningDiacriticalMarks}+", "");
+ }
}
Modified:
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
===================================================================
---
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2013-04-07 07:36:47 UTC (rev 9892)
+++
trunk/languagetool/languagetool-language-modules/ca/src/test/java/org/languagetool/rules/ca/MorfologikCatalanSpellerRuleTest.java
2013-04-07 08:36:55 UTC (rev 9893)
@@ -76,33 +76,63 @@
assertEquals("Joan", matches[0].getSuggestedReplacements().get(0));
matches = rule.match(langTool.getAnalyzedSentence("abatusats"));
- // check match positions:
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(9, matches[0].getToPos());
assertEquals("abatussats",
matches[0].getSuggestedReplacements().get(0));
matches = rule.match(langTool.getAnalyzedSentence("L'statu"));
- // check match positions:
assertEquals(1, matches.length);
assertEquals(2, matches[0].getFromPos());
assertEquals(7, matches[0].getToPos());
assertEquals("sta tu", matches[0].getSuggestedReplacements().get(0));
matches = rule.match(langTool.getAnalyzedSentence("Pecra"));
- // check match positions:
assertEquals(1, matches.length);
assertEquals(0, matches[0].getFromPos());
assertEquals(5, matches[0].getToPos());
assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
+ matches = rule.match(langTool.getAnalyzedSentence("argüit"));
+ assertEquals(1, matches.length);
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(6, matches[0].getToPos());
+ assertEquals("argüint", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("argüir", matches[0].getSuggestedReplacements().get(1));
+ assertEquals("arguït", matches[0].getSuggestedReplacements().get(2));
+
+ matches = rule.match(langTool.getAnalyzedSentence("ángel"));
+ assertEquals(1, matches.length);
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(5, matches[0].getToPos());
+ assertEquals("Àngel", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("àngel", matches[0].getSuggestedReplacements().get(1));
+ assertEquals("angle", matches[0].getSuggestedReplacements().get(2));
+ assertEquals("anhel", matches[0].getSuggestedReplacements().get(3));
+
+ matches = rule.match(langTool.getAnalyzedSentence("caçessim"));
+ assertEquals(1, matches.length);
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(8, matches[0].getToPos());
+ assertEquals("caçàssim", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("cacessin", matches[0].getSuggestedReplacements().get(1));
+ assertEquals("cacessis", matches[0].getSuggestedReplacements().get(2));
+ assertEquals("cacéssim", matches[0].getSuggestedReplacements().get(3));
+
+ matches = rule.match(langTool.getAnalyzedSentence("cantaríà"));
+ assertEquals(1, matches.length);
+ assertEquals(0, matches[0].getFromPos());
+ assertEquals(8, matches[0].getToPos());
+ assertEquals("cantarà", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("cantaria", matches[0].getSuggestedReplacements().get(1));
+
//capitalized wrong words
matches = rule.match(langTool.getAnalyzedSentence("En la Pecra"));
- // check match positions:
assertEquals(1, matches.length);
assertEquals(6, matches[0].getFromPos());
assertEquals(11, matches[0].getToPos());
- assertEquals("Pera", matches[0].getSuggestedReplacements().get(2));
+ assertEquals("Pedra", matches[0].getSuggestedReplacements().get(0));
+ assertEquals("Peira", matches[0].getSuggestedReplacements().get(1));
assertEquals(1,
rule.match(langTool.getAnalyzedSentence("aõh")).length);
assertEquals(0, rule.match(langTool.getAnalyzedSentence("a")).length);
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Minimize network downtime and maximize team effectiveness.
Reduce network management and security costs.Learn how to hire
the most talented Cisco Certified professionals. Visit the
Employer Resources Portal
http://www.cisco.com/web/learning/employer_resources/index.html
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits