Revision: 8382
http://languagetool.svn.sourceforge.net/languagetool/?rev=8382&view=rev
Author: dnaber
Date: 2012-11-12 19:55:09 +0000 (Mon, 12 Nov 2012)
Log Message:
-----------
add a more strict testing that warns if the incorrect example sentence doesn't
trigger the rule with sentence splitting - not activated by default because of
performance issues
Modified Paths:
--------------
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
2012-11-12 19:36:16 UTC (rev 8381)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleTest.java
2012-11-12 19:55:09 UTC (rev 8382)
@@ -19,12 +19,7 @@
package org.languagetool.rules.patterns;
import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
+import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@@ -37,12 +32,19 @@
import org.languagetool.rules.IncorrectExample;
import org.languagetool.rules.Rule;
import org.languagetool.rules.RuleMatch;
+import org.languagetool.rules.spelling.SpellingCheckRule;
/**
* @author Daniel Naber
*/
public class PatternRuleTest extends TestCase {
+ // A test sentence should only be a single sentence - if that's not the case
it can
+ // happen that rules are checked as being correct that in reality will never
match.
+ // This check prints a warning for affected rules, but it's disabled by
default because
+ // it makes the tests very slow:
+ private static final boolean CHECK_WITH_SENTENCE_SPLITTING = false;
+
// The [^cfmnt123]\\.|\\.[^mvngl] part is there to consider a string as a
// regexp if and only if it is not enclosed on both sides by those
characters.
// This is to cope with Polish POS tags which contain dots without being
@@ -75,6 +77,10 @@
}
System.out.println("Running tests for " + lang.getName() + "...");
final JLanguageTool languageTool = new JLanguageTool(lang);
+ if (CHECK_WITH_SENTENCE_SPLITTING) {
+ languageTool.activateDefaultPatternRules();
+ disableSpellingRules(languageTool);
+ }
final JLanguageTool allRulesLanguageTool = new JLanguageTool(lang);
allRulesLanguageTool.activateDefaultPatternRules();
final List<PatternRule> rules = new ArrayList<PatternRule>();
@@ -86,6 +92,15 @@
}
}
+ private void disableSpellingRules(JLanguageTool languageTool) {
+ final List<Rule> allRules = languageTool.getAllRules();
+ for (Rule rule : allRules) {
+ if (rule instanceof SpellingCheckRule) {
+ languageTool.disableRule(rule.getId());
+ }
+ }
+ }
+
private void testGrammarRulesFromXML(final List<PatternRule> rules,
final JLanguageTool languageTool,
final JLanguageTool
allRulesLanguageTool, final Language lang) throws IOException {
@@ -372,17 +387,17 @@
}
final String badSentence = cleanXML(origBadSentence);
assertTrue(badSentence.trim().length() > 0);
- RuleMatch[] matches = getMatches(rule, badSentence, languageTool);
+ List<RuleMatch> matches = getMatches(rule, badSentence, languageTool);
if (!rule.isWithComplexPhrase()) {
assertTrue(lang + ": Did expect one error in: \"" + badSentence
- + "\" (Rule: " + rule + "), but found " + matches.length
- + ". Additional info:" + rule.getMessage(), matches.length == 1);
+ + "\" (Rule: " + rule + "), but found " + matches.size()
+ + ". Additional info:" + rule.getMessage() + ", Matches: " +
matches, matches.size() == 1);
assertEquals(lang
- + ": Incorrect match position markup (start) for rule " + rule +
", sentence: " + badSentence,
- expectedMatchStart, matches[0].getFromPos());
+ + ": Incorrect match position markup (start) for rule " +
rule + ", sentence: " + badSentence,
+ expectedMatchStart, matches.get(0).getFromPos());
assertEquals(lang
- + ": Incorrect match position markup (end) for rule " + rule +
", sentence: " + badSentence,
- expectedMatchEnd, matches[0].getToPos());
+ + ": Incorrect match position markup (end) for rule " + rule
+ ", sentence: " + badSentence,
+ expectedMatchEnd, matches.get(0).getToPos());
// make sure suggestion is what we expect it to be
if (suggestedCorrections != null && suggestedCorrections.size() > 0)
{
assertTrue("You specified a correction but your message has no
suggestions in rule " + rule,
@@ -390,18 +405,18 @@
);
assertTrue(lang + ": Incorrect suggestions: "
+ suggestedCorrections.toString() + " != "
- + matches[0].getSuggestedReplacements() + " for rule " + rule
+ " on input: " + badSentence,
-
suggestedCorrections.equals(matches[0].getSuggestedReplacements()));
+ + matches.get(0).getSuggestedReplacements() + " for rule " +
rule + " on input: " + badSentence,
+
suggestedCorrections.equals(matches.get(0).getSuggestedReplacements()));
}
// make sure the suggested correction doesn't produce an error:
- if (matches[0].getSuggestedReplacements().size() > 0) {
- final int fromPos = matches[0].getFromPos();
- final int toPos = matches[0].getToPos();
- for (final String replacement :
matches[0].getSuggestedReplacements()) {
+ if (matches.get(0).getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches.get(0).getFromPos();
+ final int toPos = matches.get(0).getToPos();
+ for (final String replacement :
matches.get(0).getSuggestedReplacements()) {
final String fixedSentence = badSentence.substring(0, fromPos)
+ replacement + badSentence.substring(toPos);
matches = getMatches(rule, fixedSentence, languageTool);
- if (matches.length > 0) {
+ if (matches.size() > 0) {
fail("Incorrect input:\n"
+ " " + badSentence
+ "\nCorrected sentence:\n"
@@ -409,27 +424,27 @@
+ "\nBy Rule:\n"
+ " " + rule
+ "\nThe correction triggered an error itself:\n"
- + " " + matches[0] + "\n");
+ + " " + matches.get(0) + "\n");
}
}
}
} else { // for multiple rules created with complex phrases
matches = getMatches(rule, badSentence, languageTool);
- if (matches.length == 0
+ if (matches.size() == 0
&& !complexRules.containsKey(rule.getId() + badSentence)) {
complexRules.put(rule.getId() + badSentence, rule);
}
- if (matches.length != 0) {
+ if (matches.size() != 0) {
complexRules.put(rule.getId() + badSentence, null);
assertTrue(lang + ": Did expect one error in: \"" + badSentence
- + "\" (Rule: " + rule + "), got " + matches.length,
- matches.length == 1);
+ + "\" (Rule: " + rule + "), got " + matches.size(),
+ matches.size() == 1);
assertEquals(lang + ": Incorrect match position markup (start) for
rule " + rule,
- expectedMatchStart, matches[0].getFromPos());
+ expectedMatchStart, matches.get(0).getFromPos());
assertEquals(lang + ": Incorrect match position markup (end) for
rule " + rule,
- expectedMatchEnd, matches[0].getToPos());
+ expectedMatchEnd, matches.get(0).getToPos());
assertSuggestions(suggestedCorrections, lang, matches, rule);
assertSuggestionsDoNotCreateErrors(languageTool, rule,
badSentence, matches);
}
@@ -459,25 +474,25 @@
}
}
- private void assertSuggestions(List<String> suggestedCorrections, Language
lang, RuleMatch[] matches, Rule rule) {
+ private void assertSuggestions(List<String> suggestedCorrections, Language
lang, List<RuleMatch> matches, Rule rule) {
if (suggestedCorrections != null && suggestedCorrections.size() > 0) {
- final boolean isExpectedSuggestion =
suggestedCorrections.equals(matches[0].getSuggestedReplacements());
+ final boolean isExpectedSuggestion =
suggestedCorrections.equals(matches.get(0).getSuggestedReplacements());
assertTrue(lang + ": Incorrect suggestions: "
- + suggestedCorrections.toString() + " != " +
matches[0].getSuggestedReplacements()
+ + suggestedCorrections.toString() + " != " +
matches.get(0).getSuggestedReplacements()
+ " for rule " + rule, isExpectedSuggestion);
}
}
- private void assertSuggestionsDoNotCreateErrors(JLanguageTool languageTool,
PatternRule rule, String badSentence, RuleMatch[] matches) throws IOException {
- if (matches[0].getSuggestedReplacements().size() > 0) {
- final int fromPos = matches[0].getFromPos();
- final int toPos = matches[0].getToPos();
- for (final String replacement : matches[0].getSuggestedReplacements()) {
+ private void assertSuggestionsDoNotCreateErrors(JLanguageTool languageTool,
PatternRule rule, String badSentence, List<RuleMatch> matches) throws
IOException {
+ if (matches.get(0).getSuggestedReplacements().size() > 0) {
+ final int fromPos = matches.get(0).getFromPos();
+ final int toPos = matches.get(0).getToPos();
+ for (final String replacement :
matches.get(0).getSuggestedReplacements()) {
final String fixedSentence = badSentence.substring(0, fromPos)
+ replacement + badSentence.substring(toPos);
- matches = getMatches(rule, fixedSentence, languageTool);
+ List<RuleMatch> tempMatches = getMatches(rule, fixedSentence,
languageTool);
assertEquals("Corrected sentence for rule " + rule
- + " triggered error: " + fixedSentence, 0, matches.length);
+ + " triggered error: " + fixedSentence, 0, tempMatches.size());
}
}
}
@@ -514,15 +529,30 @@
return matches.length > 0;
}
- private RuleMatch[] getMatches(final Rule rule, final String sentence,
+ private List<RuleMatch> getMatches(final Rule rule, final String sentence,
final JLanguageTool languageTool) throws IOException {
final AnalyzedSentence text = languageTool.getAnalyzedSentence(sentence);
final RuleMatch[] matches = rule.match(text);
- /*
- * for (int i = 0; i < matches.length; i++) {
- * System.err.println(matches[i]); }
- */
- return matches;
+ if (CHECK_WITH_SENTENCE_SPLITTING) {
+ // "real check" with sentence splitting:
+ for (Rule r : languageTool.getAllActiveRules()) {
+ languageTool.disableRule(r.getId());
+ }
+ languageTool.enableRule(rule.getId());
+ final List<RuleMatch> realMatches = languageTool.check(sentence);
+ final List<String> realMatchRuleIds = new ArrayList<String>();
+ for (RuleMatch realMatch : realMatches) {
+ realMatchRuleIds.add(realMatch.getRule().getId());
+ }
+ for (RuleMatch match : matches) {
+ final String ruleId = match.getRule().getId();
+ if (!match.getRule().isDefaultOff() &&
!realMatchRuleIds.contains(ruleId)) {
+ System.err.println("WARNING: " + languageTool.getLanguage().getName()
+ + ": missing rule match " + ruleId + " when splitting
sentences for test sentence '" + sentence + "'");
+ }
+ }
+ }
+ return Arrays.asList(matches);
}
public void testMakeSuggestionUppercase() throws IOException {
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Monitor your physical, virtual and cloud infrastructure from a single
web console. Get in-depth insight into apps, servers, databases, vmware,
SAP, cloud infrastructure, etc. Download 30-day Free Trial.
Pricing starts from $795 for 25 servers or applications!
http://p.sf.net/sfu/zoho_dev2dev_nov
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits