Revision: 6370
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6370&view=rev
Author:   dominikoeo
Date:     2012-01-29 11:54:14 +0000 (Sun, 29 Jan 2012)
Log Message:
-----------
- added sanity check to detect duplicate exceptions in grammar rules.
- added sanity check to detect exceptions with scope="next" yet
  without skip="..." in token (those exception never apply)

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-01-29 11:33:53 UTC (rev 6369)
+++ trunk/JLanguageTool/CHANGES.txt     2012-01-29 11:54:14 UTC (rev 6370)
@@ -6,6 +6,7 @@
    -several new rules
  -Esperanto:
    -several new rules
+   -web page http://www.languagetool.org/eo/ translated in Esperanto 
  -French:
    -updated dictionary to use Dicollecte-4.4.1
    -several new rules
@@ -16,6 +17,11 @@
    -fixed some false alarms
    -added a few new rules
 
+ -Internal changes:
+   - added sanity check to detect duplicate exceptions in grammar rules.
+   - added sanity check to detect exceptions with scope="next" yet
+     without skip="..." in grammar rules.
+
 1.6 (2011-12-31)
 
  -Breton:

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java
===================================================================
--- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java   
2012-01-29 11:33:53 UTC (rev 6369)
+++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/Element.java   
2012-01-29 11:54:14 UTC (rev 6370)
@@ -588,6 +588,16 @@
   }
 
   /**
+   * Checks if the element has an exception for a next scope.
+   * (only used for testing)
+   * 
+   * @return True if the element has exception for the next scope.
+   */
+  public final boolean hasNextException() {
+    return exceptionValidNext;
+  }
+
+  /**
    * Negates the meaning of match().
    * 
    * @param negation

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
   2012-01-29 11:33:53 UTC (rev 6369)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
   2012-01-29 11:54:14 UTC (rev 6370)
@@ -121,7 +121,7 @@
           element.isRegularExpression(),
           element.getCaseSensitive(),
           element.isInflected(),
-          lang, rule.getId());
+          lang, rule.getId() + ":" + rule.getSubId());
 
         // Check postag="..." is consistent with postag_regexp="..."
         warnIfElementNotKosher(
@@ -129,10 +129,21 @@
           element.isPOStagRegularExpression(),
           element.getCaseSensitive(),
           false,
-          lang, rule.getId() + " (exception in POS tag) ");
+          lang, rule.getId() + ":" + rule.getSubId() + " (exception in POS 
tag) ");
 
+        List<Element> exceptionElements = new ArrayList<Element>();
         if (element.getExceptionList() != null) {
           for (final Element exception: element.getExceptionList()) {
+            // Detect useless exception or missing skip="...".
+            if (exception.hasNextException() && element.getSkipNext() == 0) {
+              System.err.println("The " + lang.toString() + " rule: "
+                  + rule.getId() + ":" + rule.getSubId()
+                  + " in token [" + i + "]"
+                  + " has no skip=\"...\" and yet contains scope=\"next\""
+                  + " so the exception never applies. "
+                  + " Did you forget skip=\"...\"");
+            }
+
             // Check whether exception value is consistent with regexp="..."
             // Don't check string "." since it is sometimes used as a regexp
             // and sometimes used as non regexp.
@@ -142,7 +153,8 @@
                 exception.isRegularExpression(),
                 exception.getCaseSensitive(),
                 exception.isInflected(),
-                lang, rule.getId() + " (exception in token [" + i + "]) ");
+                lang,
+                rule.getId() + ":" + rule.getSubId() + " (exception in token 
[" + i + "]) ");
             }
             // Check postag="..." of exception is consistent with 
postag_regexp="..."
             warnIfElementNotKosher(
@@ -150,13 +162,81 @@
               exception.isPOStagRegularExpression(),
               exception.getCaseSensitive(),
               false,
-              lang, rule.getId() + " (exception in POS tag of token [" + i + 
"]) ");
+              lang,
+              rule.getId() + ":" + rule.getSubId() + " (exception in POS tag 
of token [" + i + "]) ");
+
+            // Search for duplicate exceptions (which are useless).
+            // Since there are 2 nested loops on the list of exceptions,
+            // this has thus a O(n^2) complexity, where n is the number
+            // of exception in a token. But n is small and it is also
+            // for testing only so that's OK.
+            for (final Element otherException: exceptionElements) {
+              if (equalException(exception, otherException)) {
+                System.err.println("The " + lang.toString() + " rule: "
+                    + rule.getId() + ":" + rule.getSubId()
+                    + " in token [" + i + "]"
+                    + " contains duplicate exceptions with"
+                    + " string=[" + exception.getString() + "]"
+                    + " POS tag=[" + exception.getPOStag() + "]"
+                    + " negate=[" + exception.getNegation() + "]"
+                    + " POS negate=[" + exception.getPOSNegation() + "]");
+                break;
+              }
+            }
+            exceptionElements.add(exception);
           }
         }
       }
     }
   }
 
+  /**
+   * Predicate to check whether two exceptions are identical or whether
+   * one exception always implies the other.
+   *
+   * There is no reason for a token to have two identical exceptions.
+   */
+  private static boolean equalException(final Element exception1,
+                                        final Element exception2)
+  {
+    String string1 = exception1.getString();
+    String string2 = exception2.getString();
+    if (!exception1.getCaseSensitive() || !exception2.getCaseSensitive()) {
+      // String comparison is done case insensitive if one or both strings
+      // are case insensitive, because the case insensive one would imply
+      // the case sensitive one.
+      string1 = string1.toLowerCase();
+      string2 = string2.toLowerCase();
+    }
+    final boolean equalStrings = (string1 == null || string2 == null)
+      ? string1 == string2 : string1.equals(string2);
+    if (!equalStrings) {
+      return false;
+    }
+
+    final String posTag1 = exception1.getPOStag();
+    final String posTag2 = exception2.getPOStag();
+    final boolean equalPosTags = (posTag1 == null || posTag2 == null)
+      ? posTag1 == posTag2 : posTag1.equals(posTag2);
+    if (!equalPosTags) {
+      return false;
+    }
+
+    // We should not need to check for: 
+    // - isCaseSensitive() since an exception without isCaseSensitive
+    //   imply the one with isCaseSensitive.
+    // - isInflected() since an exception with inflected="yes"
+    //   implies the one without inflected="yes" if they have
+    //   identical strings.
+    //   without inflected="yes".
+    // - isRegularExpression() since a given string is either
+    //   a regexp or not.
+    return exception1.getNegation() == exception2.getNegation()
+        && exception1.getPOSNegation() == exception2.getPOSNegation()
+        && exception1.hasNextException() == exception2.hasNextException()
+        && exception1.hasPreviousException() == 
exception2.hasPreviousException();
+  }
+
   private void warnIfElementNotKosher(
       final String stringValue,
       final boolean isRegularExpression,

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Try before you buy = See our experts in action!
The most comprehensive online learning library for Microsoft developers
is just $99.99! Visual Studio, SharePoint, SQL - plus HTML5, CSS3, MVC3,
Metro Style Apps, more. Free future releases when you subscribe now!
http://p.sf.net/sfu/learndevnow-dev2
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

Reply via email to