Revision: 6370
http://languagetool.svn.sourceforge.net/languagetool/?rev=6370&view=rev
Author: dominikoeo
Date: 2012-01-29 11:54:14 +0000 (Sun, 29 Jan 2012)
Log Message:
-----------
- added sanity check to detect duplicate exceptions in grammar rules.
- added sanity check to detect exceptions with scope="next" yet
without skip="..." in token (those exception never apply)
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2012-01-29 11:33:53 UTC (rev 6369)
+++ trunk/JLanguageTool/CHANGES.txt 2012-01-29 11:54:14 UTC (rev 6370)
@@ -6,6 +6,7 @@
-several new rules
-Esperanto:
-several new rules
+ -web page http://www.languagetool.org/eo/ translated in Esperanto
-French:
-updated dictionary to use Dicollecte-4.4.1
-several new rules
@@ -16,6 +17,11 @@
-fixed some false alarms
-added a few new rules
+ -Internal changes:
+ - added sanity check to detect duplicate exceptions in grammar rules.
+ - added sanity check to detect exceptions with scope="next" yet
+ without skip="..." in grammar rules.
+
1.6 (2011-12-31)
-Breton:
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
2012-01-29 11:33:53 UTC (rev 6369)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
2012-01-29 11:54:14 UTC (rev 6370)
@@ -588,6 +588,16 @@
}
/**
+ * Checks if the element has an exception for a next scope.
+ * (only used for testing)
+ *
+ * @return True if the element has exception for the next scope.
+ */
+ public final boolean hasNextException() {
+ return exceptionValidNext;
+ }
+
+ /**
* Negates the meaning of match().
*
* @param negation
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
2012-01-29 11:33:53 UTC (rev 6369)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
2012-01-29 11:54:14 UTC (rev 6370)
@@ -121,7 +121,7 @@
element.isRegularExpression(),
element.getCaseSensitive(),
element.isInflected(),
- lang, rule.getId());
+ lang, rule.getId() + ":" + rule.getSubId());
// Check postag="..." is consistent with postag_regexp="..."
warnIfElementNotKosher(
@@ -129,10 +129,21 @@
element.isPOStagRegularExpression(),
element.getCaseSensitive(),
false,
- lang, rule.getId() + " (exception in POS tag) ");
+ lang, rule.getId() + ":" + rule.getSubId() + " (exception in POS
tag) ");
+ List<Element> exceptionElements = new ArrayList<Element>();
if (element.getExceptionList() != null) {
for (final Element exception: element.getExceptionList()) {
+ // Detect useless exception or missing skip="...".
+ if (exception.hasNextException() && element.getSkipNext() == 0) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + rule.getId() + ":" + rule.getSubId()
+ + " in token [" + i + "]"
+ + " has no skip=\"...\" and yet contains scope=\"next\""
+ + " so the exception never applies. "
+ + " Did you forget skip=\"...\"");
+ }
+
// Check whether exception value is consistent with regexp="..."
// Don't check string "." since it is sometimes used as a regexp
// and sometimes used as non regexp.
@@ -142,7 +153,8 @@
exception.isRegularExpression(),
exception.getCaseSensitive(),
exception.isInflected(),
- lang, rule.getId() + " (exception in token [" + i + "]) ");
+ lang,
+ rule.getId() + ":" + rule.getSubId() + " (exception in token
[" + i + "]) ");
}
// Check postag="..." of exception is consistent with
postag_regexp="..."
warnIfElementNotKosher(
@@ -150,13 +162,81 @@
exception.isPOStagRegularExpression(),
exception.getCaseSensitive(),
false,
- lang, rule.getId() + " (exception in POS tag of token [" + i +
"]) ");
+ lang,
+ rule.getId() + ":" + rule.getSubId() + " (exception in POS tag
of token [" + i + "]) ");
+
+ // Search for duplicate exceptions (which are useless).
+ // Since there are 2 nested loops on the list of exceptions,
+ // this has thus a O(n^2) complexity, where n is the number
+ // of exception in a token. But n is small and it is also
+ // for testing only so that's OK.
+ for (final Element otherException: exceptionElements) {
+ if (equalException(exception, otherException)) {
+ System.err.println("The " + lang.toString() + " rule: "
+ + rule.getId() + ":" + rule.getSubId()
+ + " in token [" + i + "]"
+ + " contains duplicate exceptions with"
+ + " string=[" + exception.getString() + "]"
+ + " POS tag=[" + exception.getPOStag() + "]"
+ + " negate=[" + exception.getNegation() + "]"
+ + " POS negate=[" + exception.getPOSNegation() + "]");
+ break;
+ }
+ }
+ exceptionElements.add(exception);
}
}
}
}
}
+ /**
+ * Predicate to check whether two exceptions are identical or whether
+ * one exception always implies the other.
+ *
+ * There is no reason for a token to have two identical exceptions.
+ */
+ private static boolean equalException(final Element exception1,
+ final Element exception2)
+ {
+ String string1 = exception1.getString();
+ String string2 = exception2.getString();
+ if (!exception1.getCaseSensitive() || !exception2.getCaseSensitive()) {
+ // String comparison is done case insensitive if one or both strings
+ // are case insensitive, because the case insensive one would imply
+ // the case sensitive one.
+ string1 = string1.toLowerCase();
+ string2 = string2.toLowerCase();
+ }
+ final boolean equalStrings = (string1 == null || string2 == null)
+ ? string1 == string2 : string1.equals(string2);
+ if (!equalStrings) {
+ return false;
+ }
+
+ final String posTag1 = exception1.getPOStag();
+ final String posTag2 = exception2.getPOStag();
+ final boolean equalPosTags = (posTag1 == null || posTag2 == null)
+ ? posTag1 == posTag2 : posTag1.equals(posTag2);
+ if (!equalPosTags) {
+ return false;
+ }
+
+ // We should not need to check for:
+ // - isCaseSensitive() since an exception without isCaseSensitive
+ // imply the one with isCaseSensitive.
+ // - isInflected() since an exception with inflected="yes"
+ // implies the one without inflected="yes" if they have
+ // identical strings.
+ // without inflected="yes".
+ // - isRegularExpression() since a given string is either
+ // a regexp or not.
+ return exception1.getNegation() == exception2.getNegation()
+ && exception1.getPOSNegation() == exception2.getPOSNegation()
+ && exception1.hasNextException() == exception2.hasNextException()
+ && exception1.hasPreviousException() ==
exception2.hasPreviousException();
+ }
+
private void warnIfElementNotKosher(
final String stringValue,
final boolean isRegularExpression,
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Try before you buy = See our experts in action!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs