Revision: 7490
http://languagetool.svn.sourceforge.net/languagetool/?rev=7490&view=rev
Author: dnaber
Date: 2012-06-24 19:31:54 +0000 (Sun, 24 Jun 2012)
Log Message:
-----------
support variants for false friends: using "en" means "English and all its
variants", "en-GB" means "only British English"
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/Language.java
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/FalseFriendRuleLoader.java
trunk/JLanguageTool/src/rules/false-friends.dtd
trunk/JLanguageTool/src/rules/false-friends.xml
trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/FalseFriendRuleTest.java
Modified: trunk/JLanguageTool/src/java/org/languagetool/Language.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/Language.java 2012-06-24
17:41:25 UTC (rev 7489)
+++ trunk/JLanguageTool/src/java/org/languagetool/Language.java 2012-06-24
19:31:54 UTC (rev 7490)
@@ -434,8 +434,8 @@
public static String getAllMaintainers(final ResourceBundle messages) {
final StringBuilder maintainersInfo = new StringBuilder();
final List<String> toSort = new ArrayList<String>();
- for (final Language lang : Language.LANGUAGES) {
- if (lang != Language.DEMO && !lang.isVariant()) {
+ for (final Language lang : Language.REAL_LANGUAGES) {
+ if (!lang.isVariant()) {
if (lang.getMaintainers() != null) {
final List<String> names = new ArrayList<String>();
for (Contributor contributor : lang.getMaintainers()) {
@@ -485,6 +485,28 @@
return false;
}
+ /**
+ * Return true if this is the same language as the given one, considering
+ * variants only if set for both languages. For example: en = en, en =
en-GB, en-GB = en-GB,
+ * but en-US != en-GB
+ */
+ public boolean equalsConsiderVariantsIfSpecified(Language otherLanguage) {
+ if (getShortName().equals(otherLanguage.getShortName())) {
+ final boolean thisHasVariant = hasCountryVariant();
+ final boolean otherHasVariant = otherLanguage.hasCountryVariant();
+ if (thisHasVariant && otherHasVariant) {
+ return
getShortNameWithVariant().equals(otherLanguage.getShortNameWithVariant());
+ }
+ return true;
+ } else {
+ return false;
+ }
+ }
+
+ private boolean hasCountryVariant() {
+ return getCountryVariants().length == 1 && !(getCountryVariants().length
== 1 && getCountryVariants()[0].equals("ANY"));
+ }
+
private static String listToStringWithLineBreaks(final Collection<String> l)
{
final StringBuilder sb = new StringBuilder();
int i = 0;
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
2012-06-24 17:41:25 UTC (rev 7489)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikAmericanSpellerRule.java
2012-06-24 19:31:54 UTC (rev 7490)
@@ -26,21 +26,21 @@
public final class MorfologikAmericanSpellerRule extends MorfologikSpellerRule
{
- private static final String RESOURCE_FILENAME = "/en/hunspell/en_US.dict";
-
- public MorfologikAmericanSpellerRule(ResourceBundle messages,
- Language language) {
- super(messages, language);
- }
+ public static final String RULE_ID = "MORFOLOGIK_RULE_EN_US";
- @Override
- public String getFileName() {
- return RESOURCE_FILENAME;
- }
-
- public final String getId() {
- return "MORFOLOGIK_RULE_EN_US";
- }
+ private static final String RESOURCE_FILENAME = "/en/hunspell/en_US.dict";
+ public MorfologikAmericanSpellerRule(ResourceBundle messages, Language
language) {
+ super(messages, language);
+ }
+ @Override
+ public String getFileName() {
+ return RESOURCE_FILENAME;
+ }
+
+ public final String getId() {
+ return RULE_ID;
+ }
+
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
2012-06-24 17:41:25 UTC (rev 7489)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/en/MorfologikBritishSpellerRule.java
2012-06-24 19:31:54 UTC (rev 7490)
@@ -26,20 +26,21 @@
public final class MorfologikBritishSpellerRule extends MorfologikSpellerRule {
- private static final String RESOURCE_FILENAME = "/en/hunspell/en_GB.dict";
-
- public MorfologikBritishSpellerRule(ResourceBundle messages,
- Language language) {
- super(messages, language);
- }
+ public static final String RULE_ID = "MORFOLOGIK_RULE_EN_GB";
- @Override
- public String getFileName() {
- return RESOURCE_FILENAME;
- }
-
- public final String getId() {
- return "MORFOLOGIK_RULE_EN_GB";
- }
+ private static final String RESOURCE_FILENAME = "/en/hunspell/en_GB.dict";
+ public MorfologikBritishSpellerRule(ResourceBundle messages, Language
language) {
+ super(messages, language);
+ }
+
+ @Override
+ public String getFileName() {
+ return RESOURCE_FILENAME;
+ }
+
+ public final String getId() {
+ return RULE_ID;
+ }
+
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/FalseFriendRuleLoader.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/FalseFriendRuleLoader.java
2012-06-24 17:41:25 UTC (rev 7489)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/FalseFriendRuleLoader.java
2012-06-24 19:31:54 UTC (rev 7490)
@@ -141,8 +141,7 @@
private boolean inTranslation;
- public FalseFriendRuleHandler(final Language textLanguage,
- final Language motherTongue) {
+ public FalseFriendRuleHandler(final Language textLanguage, final Language
motherTongue) {
messages = ResourceBundle.getBundle(
"org.languagetool.MessagesBundle", motherTongue.getLocale());
formatter = new MessageFormat("");
@@ -186,12 +185,12 @@
inTranslation = true;
final String languageStr = attrs.getValue("lang");
final Language tmpLang = Language.getLanguageForShortName(languageStr);
+ if (tmpLang == null) {
+ throw new SAXException("Unknown language '" + languageStr + "'");
+ }
currentTranslationLanguage = tmpLang;
- if (tmpLang == motherTongue) {
+ if (tmpLang.equalsConsiderVariantsIfSpecified(motherTongue)) {
translationLanguage = tmpLang;
- if (translationLanguage == null) {
- throw new SAXException("Unknown language '" + languageStr + "'");
- }
}
} else if (qName.equals(EXAMPLE)
&& attrs.getValue(TYPE).equals("correct")) {
@@ -215,8 +214,8 @@
public void endElement(final String namespaceURI, final String sName,
final String qName) {
if (qName.equals(RULE)) {
- if (language == textLanguage && translationLanguage != null
- && translationLanguage == motherTongue && language != motherTongue
+ if (language.equalsConsiderVariantsIfSpecified(textLanguage) &&
translationLanguage != null
+ &&
translationLanguage.equalsConsiderVariantsIfSpecified(motherTongue) && language
!= motherTongue
&& !translations.isEmpty()) {
formatter.applyPattern(messages.getString("false_friend_hint"));
final String tokensAsString = StringUtils.join(elementList, "
").replace('|', '/');
@@ -248,10 +247,11 @@
} else if (qName.equals(PATTERN)) {
inPattern = false;
} else if (qName.equals(TRANSLATION)) {
- if (currentTranslationLanguage == motherTongue) {
+ if
(currentTranslationLanguage.equalsConsiderVariantsIfSpecified(motherTongue)) {
translations.add(translation);
}
- if (currentTranslationLanguage == textLanguage && language ==
motherTongue) {
+ if
(currentTranslationLanguage.equalsConsiderVariantsIfSpecified(textLanguage)
+ && language.equalsConsiderVariantsIfSpecified(motherTongue)) {
suggestions.add(translation.toString());
}
translation = new StringBuilder();
@@ -282,8 +282,7 @@
private String formatTranslations(final List<StringBuilder> translations) {
final StringBuilder sb = new StringBuilder();
- for (final Iterator<StringBuilder> iter = translations.iterator(); iter
- .hasNext();) {
+ for (final Iterator<StringBuilder> iter = translations.iterator();
iter.hasNext();) {
final StringBuilder trans = iter.next();
sb.append('"');
sb.append(trans.toString());
Modified: trunk/JLanguageTool/src/rules/false-friends.dtd
===================================================================
--- trunk/JLanguageTool/src/rules/false-friends.dtd 2012-06-24 17:41:25 UTC
(rev 7489)
+++ trunk/JLanguageTool/src/rules/false-friends.dtd 2012-06-24 19:31:54 UTC
(rev 7490)
@@ -13,7 +13,7 @@
redundancy.
-->
-<!ENTITY % Languages "(en|de|fr|es|pl|sv|gl|ca|it)">
+<!ENTITY % Languages "(en|en-GB|de|fr|es|pl|sv|gl|ca|it)">
<!ELEMENT rules (rulegroup)+>
<!ELEMENT rulegroup (rule+)>
Modified: trunk/JLanguageTool/src/rules/false-friends.xml
===================================================================
--- trunk/JLanguageTool/src/rules/false-friends.xml 2012-06-24 17:41:25 UTC
(rev 7489)
+++ trunk/JLanguageTool/src/rules/false-friends.xml 2012-06-24 19:31:54 UTC
(rev 7490)
@@ -5534,4 +5534,21 @@
<translation lang="en">department store</translation>
</rule>
</rulegroup>
+
+ <rulegroup id="DEMO_ENTRY">
+ <!-- for internal test cases only (FalseFriendRuleTest.java) -->
+ <rule>
+ <pattern lang="en-GB">
+ <token>forDemoOnly</token>
+ </pattern>
+ <translation lang="de">forDemoTranslation</translation>
+ </rule>
+ <rule>
+ <pattern lang="de">
+ <token>forDemoFalseFriend</token>
+ </pattern>
+ <translation lang="en-GB">forDemoFalseFriendTranslation</translation>
+ </rule>
+ </rulegroup>
+
</rules>
Modified: trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
===================================================================
--- trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
2012-06-24 17:41:25 UTC (rev 7489)
+++ trunk/JLanguageTool/src/test/org/languagetool/LanguageTest.java
2012-06-24 19:31:54 UTC (rev 7490)
@@ -100,4 +100,18 @@
assertEquals(Language.AMERICAN_ENGLISH,
Language.getLanguageForLocale(new Locale("zz")));
}
+ @Test
+ public void testEqualsConsiderVariantIfSpecified() {
+ // every language equals itself:
+
assertTrue(Language.GERMAN.equalsConsiderVariantsIfSpecified(Language.GERMAN));
+
assertTrue(Language.GERMANY_GERMAN.equalsConsiderVariantsIfSpecified(Language.GERMANY_GERMAN));
+
assertTrue(Language.ENGLISH.equalsConsiderVariantsIfSpecified(Language.ENGLISH));
+
assertTrue(Language.AMERICAN_ENGLISH.equalsConsiderVariantsIfSpecified(Language.AMERICAN_ENGLISH));
+ // equal if variant is the same, but only if specified:
+
assertTrue(Language.AMERICAN_ENGLISH.equalsConsiderVariantsIfSpecified(Language.ENGLISH));
+
assertTrue(Language.ENGLISH.equalsConsiderVariantsIfSpecified(Language.AMERICAN_ENGLISH));
+
+
assertFalse(Language.AMERICAN_ENGLISH.equalsConsiderVariantsIfSpecified(Language.BRITISH_ENGLISH));
+
assertFalse(Language.ENGLISH.equalsConsiderVariantsIfSpecified(Language.GERMAN));
+ }
}
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/FalseFriendRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/FalseFriendRuleTest.java
2012-06-24 17:41:25 UTC (rev 7489)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/FalseFriendRuleTest.java
2012-06-24 19:31:54 UTC (rev 7490)
@@ -25,15 +25,14 @@
import junit.framework.TestCase;
+import org.languagetool.rules.en.MorfologikAmericanSpellerRule;
+import org.languagetool.rules.en.MorfologikBritishSpellerRule;
import org.xml.sax.SAXException;
import org.languagetool.JLanguageTool;
import org.languagetool.Language;
import org.languagetool.rules.RuleMatch;
-/**
- * @author Daniel Naber
- */
public class FalseFriendRuleTest extends TestCase {
public void testHintsForGermanSpeakers() throws IOException,
ParserConfigurationException, SAXException {
@@ -47,6 +46,36 @@
assertEquals("[boss, chief]",
matches2.get(0).getSuggestedReplacements().toString());
}
+ public void testHintsForGermanSpeakersWithVariant() throws IOException,
ParserConfigurationException, SAXException {
+ JLanguageTool langTool = new JLanguageTool(Language.BRITISH_ENGLISH,
Language.SWISS_GERMAN);
+ langTool.activateDefaultFalseFriendRules();
+ final List<RuleMatch> matches = assertErrors(1, "We will berate you.",
langTool);
+ assertEquals(matches.get(0).getSuggestedReplacements().toString(), "[to
provide advice, to give advice]");
+ assertErrors(0, "We will give you advice.", langTool);
+ assertErrors(1, "I go to high school in Berlin.", langTool);
+ final List<RuleMatch> matches2 = assertErrors(1, "The chef", langTool);
+ assertEquals("[boss, chief]",
matches2.get(0).getSuggestedReplacements().toString());
+ }
+
+ public void testHintsForDemoLanguage() throws IOException,
ParserConfigurationException, SAXException {
+ JLanguageTool langTool1 = new JLanguageTool(Language.BRITISH_ENGLISH,
Language.GERMAN);
+ langTool1.disableRule(MorfologikBritishSpellerRule.RULE_ID);
+ langTool1.activateDefaultFalseFriendRules();
+ final List<RuleMatch> matches1 = assertErrors(1, "And forDemoOnly.",
langTool1);
+ assertEquals("DEMO_ENTRY", matches1.get(0).getRule().getId());
+
+ JLanguageTool langTool2 = new JLanguageTool(Language.ENGLISH,
Language.GERMAN);
+ langTool2.disableRule(MorfologikBritishSpellerRule.RULE_ID);
+ langTool2.activateDefaultFalseFriendRules();
+ final List<RuleMatch> matches2 = assertErrors(1, "And forDemoOnly.",
langTool2);
+ assertEquals("DEMO_ENTRY", matches2.get(0).getRule().getId());
+
+ JLanguageTool langTool3 = new JLanguageTool(Language.AMERICAN_ENGLISH,
Language.GERMAN);
+ langTool3.disableRule(MorfologikAmericanSpellerRule.RULE_ID);
+ langTool3.activateDefaultFalseFriendRules();
+ assertErrors(0, "And forDemoOnly.", langTool3);
+ }
+
public void testHintsForEnglishSpeakers() throws IOException,
ParserConfigurationException, SAXException {
JLanguageTool langTool = new JLanguageTool(Language.GERMAN,
Language.ENGLISH);
langTool.activateDefaultFalseFriendRules();
@@ -68,24 +97,24 @@
private List<RuleMatch> assertErrors(int errorCount, String s, JLanguageTool
langTool) throws IOException {
List<RuleMatch> matches = langTool.check(s);
//System.err.println(matches);
- assertEquals(errorCount, matches.size());
+ assertEquals("Matches found: " + matches, errorCount, matches.size());
return matches;
}
- private void assertSuggestions(final int suggestionCount, final String s,
final JLanguageTool langTool) throws IOException {
- final List<RuleMatch> matches = langTool.check(s);
- int suggFound = 0;
+ private void assertSuggestions(final int suggestionCount, final String text,
final JLanguageTool langTool) throws IOException {
+ final List<RuleMatch> matches = langTool.check(text);
+ int suggestionsFound = 0;
for (final RuleMatch match : matches) {
int pos = 0;
while (pos != -1) {
pos = match.getMessage().indexOf("<suggestion>", pos + 1);
- suggFound ++;
+ suggestionsFound ++;
}
}
- if (suggFound > 0) {
- suggFound--;
+ if (suggestionsFound > 0) {
+ suggestionsFound--;
}
- assertEquals(suggestionCount, suggFound);
+ assertEquals(suggestionCount, suggestionsFound);
}
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Live Security Virtual Conference
Exclusive live event will cover all the ways today's security and
threat landscape has changed and how IT managers can respond. Discussions
will include endpoint security, mobile security and the latest in malware
threats. http://www.accelacomm.com/jaw/sfrnl04242012/114/50122263/
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs