Revision: 8454
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=8454&view=rev
Author:   dnaber
Date:     2012-11-25 21:50:56 +0000 (Sun, 25 Nov 2012)
Log Message:
-----------
prototypical implementation of "Localization Quality Issue Type" from 
Internationalization Tag Set standard 2.0, which means we categorize our errors 
into standard categories, additionally to the LT ones

Modified Paths:
--------------
    trunk/JLanguageTool/CHANGES.txt
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
    trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
    trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
    trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
    trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
    
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
    trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
    
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
    
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
    trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
    trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
    
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
    
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java

Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt     2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/CHANGES.txt     2012-11-25 21:50:56 UTC (rev 8454)
@@ -30,7 +30,19 @@
 
  -API: Language.getLanguageForShortName() now consistently throws an exception
   if the given language code is not known
- 
+
+ -HTTP API: the XML we return now contains a new attribute 
"locqualityissuetype", which
+  is the "Localization Quality Issue Type" in the upcoming 
Internationalization Tag Set (ITS)
+  Version 2.0 standard from W3C. This means errors are now categorized 
according to
+  a standard, additionally to LanguageTool's own categories. Useful values are 
only
+  returned for English for now.
+  *** Please consider this to be a prototypical implementation for now ***
+  For the values and their meanings, please see
+  
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues.
+  For rule developers: specify this using the new 'type' attribute. It is
+  inherited from category to rulegroup, and from rulegroup to rule. If a rule 
also
+  has the 'type' it overwrites the rulegroup's and category's 'type'.
+
  -HTTP API: support for auto-detecting text language (parameter autodetect=1)
 
  -HTTP API: added HTTPSServer, a lightweight embedded HTTPS server which works 
like HTTPServer

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
  2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
  2012-11-25 21:50:56 UTC (rev 8454)
@@ -76,6 +76,7 @@
     this.withHyphenMessage = withHyphenMessage;
     this.withoutHyphenMessage = withoutHyphenMessage;
     this.withOrWithoutHyphenMessage = withOrWithoutHyphenMessage;
+    setLocQualityIssueType("misspelling");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
   2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
   2012-11-25 21:50:56 UTC (rev 8454)
@@ -37,6 +37,7 @@
   public CommaWhitespaceRule(final ResourceBundle messages) {
     super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
+    setLocQualityIssueType("typographical");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
 2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
 2012-11-25 21:50:56 UTC (rev 8454)
@@ -35,6 +35,7 @@
   public DoublePunctuationRule(final ResourceBundle messages) {
     super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
+    setLocQualityIssueType("typographical");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
   2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
   2012-11-25 21:50:56 UTC (rev 8454)
@@ -71,6 +71,7 @@
     endSymbols = language.getUnpairedRuleEndSymbols();
     numerals = NUMERALS_EN;
     uniqueMapInit();
+    setLocQualityIssueType("typographical");
   }
 
   

Modified: trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java  
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java  
2012-11-25 21:50:56 UTC (rev 8454)
@@ -40,6 +40,7 @@
 
   private List<String> correctExamples;
   private List<IncorrectExample> incorrectExamples;
+  private String locQualityIssueType = "uncategorized";
   private Category category;
   private URL url;
   /** If true, then the rule is turned off by default. */
@@ -249,4 +250,29 @@
   public void setUrl(URL url) {
     this.url = url;
   }
+
+  /**
+   * Returns the Localization Quality Issue Type, as defined
+   * at <a 
href="http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues";
+   * 
>http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues</a>.
+   *
+   * <p>Note that not all languages nor all rules actually map yet to a type 
yet. In those
+   * cases, <tt>uncategorized</tt> is returned.
+   *
+   * @return the Localization Quality Issue Type - <tt>uncategorized</tt> if 
no type has been assigned
+   * @since 2.0
+   */
+  public String getLocQualityIssueType() {
+    return locQualityIssueType;
+  }
+
+  /**
+   * Set the Localization Quality Issue Type.
+   * @see #getLocQualityIssueType()
+   * @since 2.0
+   */
+  public void setLocQualityIssueType(String locQualityIssueType) {
+    this.locQualityIssueType = locQualityIssueType;
+  }
+
 }

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -42,6 +42,7 @@
     super(messages);
     super.setCategory(new Category(messages.getString("category_case")));
     this.language = language;
+    setLocQualityIssueType("typographical");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java    
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java    
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -39,6 +39,7 @@
   public WhitespaceRule(final ResourceBundle messages, final Language 
language) {
     super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
+    setLocQualityIssueType("typographical");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
       2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
       2012-11-25 21:50:56 UTC (rev 8454)
@@ -40,6 +40,7 @@
   public WordRepeatBeginningRule(final ResourceBundle messages, final Language 
language) {
     super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
+    setLocQualityIssueType("style");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java    
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java    
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -36,6 +36,7 @@
   public WordRepeatRule(final ResourceBundle messages, final Language 
language) {
     super(messages);
     super.setCategory(new Category(messages.getString("category_misc")));
+    setLocQualityIssueType("addition");
   }
 
   /**

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
        2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
        2012-11-25 21:50:56 UTC (rev 8454)
@@ -48,6 +48,7 @@
     }
     final String filename = getFilename();
     contextWordsSet = 
loadContextWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(filename));
+    setLocQualityIssueType("terminology");
   }
 
   protected abstract String getFilename();

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -32,7 +32,11 @@
  *
  */
 public class DifferentLengthRule extends BitextRule {
-  
+
+  public DifferentLengthRule() {
+    setLocQualityIssueType("length");
+  }
+
   @Override
   public String getDescription() { 
     return "Check if translation length is similar to source length";

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -33,6 +33,10 @@
  */
 public class SameTranslationRule extends BitextRule {
 
+  public SameTranslationRule() {
+    setLocQualityIssueType("untranslated");
+  }
+
   @Override
   public String getDescription() { 
     return "Check if translation is the same as source";

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java  
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java  
2012-11-25 21:50:56 UTC (rev 8454)
@@ -58,6 +58,7 @@
     }
     requiresA = 
loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_A));
     requiresAn = 
loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_AN));
+    setLocQualityIssueType("terminology");
   }
   
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
 2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
 2012-11-25 21:50:56 UTC (rev 8454)
@@ -45,6 +45,7 @@
 
   public BritishReplaceRule(final ResourceBundle messages) throws IOException {
     super(messages);
+    setLocQualityIssueType("terminology");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
   2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
   2012-11-25 21:50:56 UTC (rev 8454)
@@ -38,6 +38,9 @@
   private boolean defaultOn;
 
   protected Category category;
+  protected String categoryIssueType;
+  protected String ruleGroupIssueType;
+  protected String ruleIssueType;
   protected String name;
   private String ruleGroupDescription;
   private int startPos = -1;
@@ -62,6 +65,9 @@
       if ("off".equals(attrs.getValue(DEFAULT))) {
         category.setDefaultOff();
       }
+      if (attrs.getValue("type") != null) {
+        categoryIssueType = attrs.getValue("type");
+      }
     } else if ("rules".equals(qName)) {
       final String languageStr = attrs.getValue("lang");
       language = Language.getLanguageForShortName(languageStr);
@@ -90,6 +96,9 @@
       if (suggestionMatches != null) {
         suggestionMatches.clear();
       }
+      if (attrs.getValue("type") != null) {
+        ruleIssueType = attrs.getValue("type");
+      }
     } else if (PATTERN.equals(qName)) {
       startPattern(attrs);
       tokenCountForMarker = 0;
@@ -138,6 +147,9 @@
       defaultOn = "on".equals(attrs.getValue(DEFAULT));
       inRuleGroup = true;
       subId = 0;
+      if (attrs.getValue("type") != null) {
+        ruleGroupIssueType = attrs.getValue("type");
+      }
     } else if ("suggestion".equals(qName) && inMessage) {      
       if (YES.equals(attrs.getValue("suppress_misspelled"))) {
         message.append("<pleasespellme/>");
@@ -172,7 +184,9 @@
   @Override
   public void endElement(final String namespaceURI, final String sName,
       final String qName) throws SAXException {
-    if (RULE.equals(qName)) {
+    if ("category".equals(qName)) {
+      categoryIssueType = null;
+    } else if (RULE.equals(qName)) {
       phraseElementInit();
       if (phraseElementList.isEmpty()) {
         final PatternRule rule = new PatternRule(id, language, elementList,
@@ -199,6 +213,7 @@
       if (phraseElementList != null) {
         phraseElementList.clear();
       }
+      ruleIssueType = null;
 
     } else if (EXCEPTION.equals(qName)) {
       finalizeExceptions();
@@ -249,6 +264,7 @@
       inMatch = false;
     } else if (RULEGROUP.equals(qName)) {
       inRuleGroup = false;
+      ruleGroupIssueType = null;
     } else if ("suggestion".equals(qName) && inMessage) {
       message.append("</suggestion>");      
       inSuggestion = false;
@@ -319,6 +335,14 @@
         throw new RuntimeException("Could not parse URL for rule: " + rule + 
": '" + url + "'", e);
       }
     }
+    // inheritance of values - if no type value is defined for a rule, take 
the rule group's value etc:
+    if (ruleIssueType != null) {
+      rule.setLocQualityIssueType(ruleIssueType);
+    } else if (ruleGroupIssueType != null) {
+      rule.setLocQualityIssueType(ruleGroupIssueType);
+    } else if (categoryIssueType != null) {
+      rule.setLocQualityIssueType(categoryIssueType);
+    }
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
===================================================================
--- 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -45,6 +45,7 @@
   public SpellingCheckRule(final ResourceBundle messages, final Language 
language) {
     super(messages);
     this.language = language;
+    setLocQualityIssueType("misspelling");
   }
 
   @Override

Modified: 
trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java   
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java   
2012-11-25 21:50:56 UTC (rev 8454)
@@ -395,8 +395,12 @@
       }
       final Category category = match.getRule().getCategory();
       if (category != null) {
-        xml.append(" category=\"" + category.getName() + "\"");
+        xml.append(" category=\"" + escapeXMLForAPIOutput(category.getName()) 
+ "\"");
       }
+      final String type = match.getRule().getLocQualityIssueType();
+      if (type != null) {
+        xml.append(" locqualityissuetype=\"" + escapeXMLForAPIOutput(type) + 
"\"");
+      }
       xml.append("/>\n");
     }
     if (xmlMode == XmlPrintMode.END_XML || xmlMode == XmlPrintMode.NORMAL_XML) 
{

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd 
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd 
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -54,6 +54,14 @@
 <!-- The category of the match, if any (added in LanguageTool 1.9). -->
 <!ATTLIST error category CDATA #IMPLIED>
 
+<!-- Localization Quality Issue Type, according to Internationalization
+ Tag Set (ITS) Version 2.0,
+ see 
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues
+ (added in LanguageTool 2.0).
+ *** Please consider this to be a prototypical implementation for now ***
+ -->
+<!ATTLIST error locqualityissuetype CDATA #IMPLIED>
+
 <!-- The language of the match (added in LanguageTool 2.0). -->
 <!ELEMENT language EMPTY>
 

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
  2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
  2012-11-25 21:50:56 UTC (rev 8454)
@@ -30,7 +30,7 @@
        <!-- 
====================================================================== -->
        <!-- Possible typos -->
        <!-- 
====================================================================== -->
-       <category name="American English phrases">
+       <category name="American English phrases" type="terminology">
                <rule id="ZIP_CODE_POSTCODE" name="zip code/postcode">
                        <pattern>
                                <token>zip</token>

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml    
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml    
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -38,7 +38,7 @@
     <!-- 
====================================================================== -->
     <!-- Possible typos -->
     <!-- 
====================================================================== -->
-    <category name="Possible Typos">
+    <category name="Possible Typos" type="terminology">
         <rule id="FEEL_TREE_TO" name="feel tree (free) to">
             <pattern>
                 <token>feel</token>
@@ -5606,7 +5606,7 @@
     <!-- 
====================================================================== -->
     <!-- Grammar -->
     <!-- 
====================================================================== -->
-    <category name="Grammar">
+    <category name="Grammar" type="grammar">
         <rulegroup id="WANT_THAT_I" name="want that I(want me to)">
             <!-- TODO: extend this rule to cover more than personal pronouns. 
This is actually a very common error for German speakers. -->
             <rule>
@@ -8188,7 +8188,7 @@
             </rule>
         </rulegroup>
     </category>
-    <category name="Collocations">
+    <category name="Collocations" type="terminology">
         <rulegroup id="SUPERIOR_THAN" name="Wrong preposition: 
'superior/inferior than' (superior/inferior to)">
             <rule>
                 <pattern>
@@ -8628,7 +8628,7 @@
             </rule>
         </rulegroup>
     </category>
-    <category name="Punctuation Errors">
+    <category name="Punctuation Errors" type="typographical">
         <rulegroup default="off" id="EG_NO_COMMA" name="'e.g.' without a 
comma">
             <rule>
                 <pattern>
@@ -8784,7 +8784,7 @@
 
 
     </category>
-    <category name="Commonly Confused Words">
+    <category name="Commonly Confused Words" type="terminology">
         <rulegroup id="SITE_SIDE" name="site (side)">
             <rule>
                 <pattern>
@@ -9335,7 +9335,7 @@
             </rule>
         </rulegroup>
     </category>
-    <category name="Nonstandard Phrases">
+    <category name="Nonstandard Phrases" type="terminology">
         <rule id="IN_THE_MOMENT" name="in the moment (currently)">
             <!-- Typical mistake for German native speakers -->
             <pattern>
@@ -9541,7 +9541,7 @@
             <example type="correct">He is a well-known actor.</example>
         </rule>
     </category>
-    <category name="Slang">
+    <category name="Slang" type="register">
         <rule id="AN_INVITE" name="an invite (invitation)">
             <pattern>
                 <token>an</token>
@@ -9555,7 +9555,7 @@
             <example type="correct">Is that an 
<marker>invitation</marker>...?</example>
         </rule>
     </category>
-    <category name="Redundant Phrases">
+    <category name="Redundant Phrases" type="style">
         <rulegroup id="ATM_MACHINE" name="ATM machine (ATM)">
             <rule>
                 <pattern>
@@ -10967,7 +10967,7 @@
             <example type="correct">What the government decides to do depends 
on whether the bill passes.</example>
         </rule>
     </category>
-    <category name="Bad style">
+    <category name="Bad style" type="style">
         <rule default="off" id="SENT_START_NUM" name="Number starting a 
sentence">
             <pattern>
                 <token postag="SENT_START"></token>
@@ -11000,7 +11000,7 @@
             <example type="correct">The security software we offer is the 
state-of-the art achievement.</example>
         </rule>
     </category>
-    <category name="Capitalization">
+    <category name="Capitalization" type="typographical">
         <rulegroup default="off" id="EN_CAPITALIZE" name="Capitalize lowercase 
words ('i am')">
             <rule>
                 <pattern case_sensitive="yes">

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
===================================================================
--- trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd     
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd     
2012-11-25 21:50:56 UTC (rev 8454)
@@ -63,6 +63,7 @@
                                </xs:simpleType>
                        </xs:attribute>
                        <xs:attribute name="name" type="xs:string" 
use="required" />
+                       <xs:attribute ref="type" use="optional" />
                </xs:complexType>
        </xs:element>
 
@@ -89,14 +90,54 @@
                        </xs:attribute>
                        <xs:attribute name="name" type="xs:string" 
use="optional" />
                        <xs:attribute name="id" type="xs:ID" use="required" />
+                       <xs:attribute ref="type" use="optional" />
                </xs:complexType>
        </xs:element>
 
+    <xs:attribute name="type">
+        <xs:annotation>
+            <xs:documentation xml:lang="en">
+                Localization Quality Issue Type, according to 
Internationalization Tag Set (ITS) Version 2.0,
+                see 
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues
+                (added in LanguageTool 2.0)
+            </xs:documentation>
+        </xs:annotation>
+        <xs:simpleType>
+            <xs:restriction base="xs:NMTOKEN">
+                <xs:enumeration value="terminology" />
+                <xs:enumeration value="mistranslation" />
+                <xs:enumeration value="omission" />
+                <xs:enumeration value="untranslated" />
+                <xs:enumeration value="addition" />
+                <xs:enumeration value="duplication" />
+                <xs:enumeration value="inconsistency" />
+                <xs:enumeration value="grammar" />
+                <xs:enumeration value="legal" />
+                <xs:enumeration value="register" />
+                <xs:enumeration value="locale-specific-content" />
+                <xs:enumeration value="locale-violation" />
+                <xs:enumeration value="style" />
+                <xs:enumeration value="characters" />
+                <xs:enumeration value="misspelling" />
+                <xs:enumeration value="typographical" />
+                <xs:enumeration value="formatting" />
+                <xs:enumeration value="inconsistent-entities" />
+                <xs:enumeration value="numbers" />
+                <xs:enumeration value="markup" />
+                <xs:enumeration value="pattern-problem" />
+                <xs:enumeration value="whitespace" />
+                <xs:enumeration value="internationalization" />
+                <xs:enumeration value="length" />
+                <xs:enumeration value="uncategorized" />
+                <xs:enumeration value="other" />
+            </xs:restriction>
+        </xs:simpleType>
+    </xs:attribute>
+
        <xs:annotation>
                <xs:documentation xml:lang="en"> The rule element. The
                        unique ID is
                        required only if the rule is not contained in a rule 
group.
-
                        The rule can be switched by default off (using the 
default attribute).
                </xs:documentation>
        </xs:annotation>
@@ -119,6 +160,7 @@
                        </xs:attribute>
                        <xs:attribute name="name" type="xs:string" 
use="optional" />
                        <xs:attribute name="id" type="xs:ID" use="optional" />
+                       <xs:attribute ref="type" use="optional" />
                </xs:complexType>
        </xs:element>
 

Modified: 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
===================================================================
--- 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml    
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml    
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -47,6 +47,7 @@
       <token regexp="yes">.*_sing_.*</token>
     </equivalence>
   </unification>
+
   <phrases>
     <phrase id="UNIFICATION_PHRASE">
       <unify>
@@ -110,8 +111,8 @@
   </phrases>
 
   <category name="misc">
-    <!-- a trivial demo rule that matches &quot;foo&quot; followed by 
&quot;bar&quot; -->
     <rule id="DEMO_RULE" name="Find 'foo bar'">
+      <!-- a trivial demo rule that matches "foo" followed by "bar" -->
       <pattern case_sensitive="no">
         <token>foo</token>
         <token>bar</token>
@@ -143,6 +144,9 @@
       <example type="correct">a small <marker>test</marker></example>
       <example type="incorrect">a small <marker>toast</marker></example>
     </rule>
+  </category>
+
+  <category name="otherCategory" type="addition">
     <rule id="TEST_GO" name="another test of phrases">
       <pattern>
         <token>foo</token>
@@ -154,7 +158,7 @@
       <example type="incorrect"><marker>foo go</marker> bar</example>
       <example type="incorrect"><marker>foo goa</marker> bar</example>
     </rule>
-    <rule id="TEST_PHRASES1" name="test phrases mechanism">
+    <rule id="TEST_PHRASES1" name="test phrases mechanism" 
type="uncategorized">
       <pattern>
         <phraseref idref="COMPLEX_MULTIPLE"></phraseref>
         <token>there</token>
@@ -163,7 +167,7 @@
       <example type="correct">go here</example>
       <example type="incorrect"><marker>first goes last there</marker>, 
please!</example>
     </rule>
-    <rule id="test_include" name="test two includeblocks">
+    <rule id="test_include" name="test two includeblocks" type="characters">
       <pattern>
         <phraseref idref="TEST_INCLUDE"></phraseref>
       </pattern>
@@ -352,26 +356,26 @@
     <example type="correct">This is Test1 abc Cde End.</example>
     </rule>
     <rulegroup id="test_spacebefore" name="Test spacebefore on exceptions">
-    <rule>
-       <pattern>
-               <token>blah<exception scope="previous" 
spacebefore="no">'</exception></token>
-               <token>blah</token>
-       </pattern>
-       <message>This is a dummy message.</message>
-       <example type="correct">Dogs' blah blah</example>
-       <example type="incorrect">Dogs <marker>blah blah</marker></example>
-    </rule>
-    <rule>
-       <pattern>
-               <token>blah<exception spacebefore="no">blah</exception></token>
-               <token>blah</token>
-       </pattern>
-       <message>This is a dummy message.</message>
-       <example type="correct">Dogs 'blah blah</example>
-       <example type="incorrect">Dogs <marker>blah blah</marker></example>
-    </rule>
+        <rule>
+            <pattern>
+                <token>blah<exception scope="previous" 
spacebefore="no">'</exception></token>
+                <token>blah</token>
+            </pattern>
+            <message>This is a dummy message.</message>
+            <example type="correct">Dogs' blah blah</example>
+            <example type="incorrect">Dogs <marker>blah blah</marker></example>
+        </rule>
+        <rule type="duplication">
+            <pattern>
+                <token>blah<exception spacebefore="no">blah</exception></token>
+                <token>blah</token>
+            </pattern>
+            <message>This is a dummy message.</message>
+            <example type="correct">Dogs 'blah blah</example>
+            <example type="incorrect">Dogs <marker>blah blah</marker></example>
+        </rule>
     </rulegroup>
-    <rulegroup id="test_unification_with_negation" name="Test unification with 
negation">
+    <rulegroup id="test_unification_with_negation" name="Test unification with 
negation" type="grammar">
        <rule>
                <pattern>
                        <unify negate="yes">
@@ -391,4 +395,5 @@
        </rule>
     </rulegroup>    
   </category>
+
 </rules>
\ No newline at end of file

Modified: trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
===================================================================
--- trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java    
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java    
2012-11-25 21:50:56 UTC (rev 8454)
@@ -187,7 +187,8 @@
     String output = new String(this.out.toByteArray());
     assertTrue("Got: " + output, output.contains("<error fromy=\"4\" 
fromx=\"5\" toy=\"4\" tox=\"10\" " +
             "ruleId=\"ENGLISH_WORD_REPEAT_RULE\" msg=\"Possible typo: you 
repeated a word\" replacements=\"is\" " +
-            "context=\"This is is a test of language tool. \" 
contextoffset=\"5\" offset=\"5\" errorlength=\"5\" 
category=\"Miscellaneous\"/>"));
+            "context=\"This is is a test of language tool. \" 
contextoffset=\"5\" offset=\"5\" errorlength=\"5\" " +
+            "category=\"Miscellaneous\" locqualityissuetype=\"addition\"/>"));
     // note: the offset is relative to the sentence... this seems wrong - it 
happens because of the way
     // the command line client feeds the data into the check() methods.
   }
@@ -284,7 +285,7 @@
     assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\" 
tox=\"10\" ruleId=\"EN_A_VS_AN\" " +
             "msg=\"Use 'a' instead of 'an' if the following word doesn't start 
with a vowel sound, e.g. 'a sentence', " +
             "'a university'\" replacements=\"a\" context=\"This is an test.  
This is a test of of language tool.  ...\" " +
-            "contextoffset=\"8\" offset=\"8\" errorlength=\"2\" 
category=\"Miscellaneous\"/>"));
+            "contextoffset=\"8\" offset=\"8\" errorlength=\"2\" 
category=\"Miscellaneous\" locqualityissuetype=\"terminology\"/>"));
   }
   
   public void testGermanFileWithURL() throws Exception {
@@ -318,7 +319,7 @@
     assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\" 
tox=\"20\" ruleId=\"BRAK_PRZECINKA_KTORY\" subId=\"5\""));
     //This tests whether XML encoding is actually UTF-8:
     assertTrue(output.contains("msg=\"Brak przecinka w tym fragmencie zdania. 
Przecinek prawdopodobnie należy postawić tak: 'świnia, która'.\" 
replacements=\"świnia, która\" "));
-    assertTrue(output.contains("context=\"To jest świnia która się ślini. \" 
contextoffset=\"8\" offset=\"8\" errorlength=\"12\" category=\"Błędy 
interpunkcyjne\"/>"));
+    assertTrue(output.contains("context=\"To jest świnia która się ślini. \" 
contextoffset=\"8\" offset=\"8\" errorlength=\"12\" category=\"Błędy 
interpunkcyjne\""));
   }
   
   public void testPolishLineNumbers() throws Exception {

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
        2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
        2012-11-25 21:50:56 UTC (rev 8454)
@@ -1,6 +1,9 @@
 package org.languagetool.rules.patterns;
 
+import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 
 import junit.framework.TestCase;
 
@@ -15,16 +18,42 @@
     final String name = "/xx/grammar.xml";
     final List<PatternRule> rules = 
prg.getRules(JLanguageTool.getDataBroker().getFromRulesDirAsStream(name), name);
     assertTrue(rules.size() >= 30);
+
     final Rule demoRule1 = getRuleById("DEMO_RULE", rules);
     assertEquals("http://fake-server.org/foo-bar-error-explained";, 
demoRule1.getUrl().toString());
     assertEquals("[This is <marker>fuu bah</marker>.]", 
demoRule1.getCorrectExamples().toString());
     final List<IncorrectExample> incorrectExamples = 
demoRule1.getIncorrectExamples();
     assertEquals(1, incorrectExamples.size());
     assertEquals("This is <marker>foo bar</marker>.", 
incorrectExamples.get(0).getExample());
+
     final Rule demoRule2 = getRuleById("API_OUTPUT_TEST_RULE", rules);
     assertNull(demoRule2.getUrl());
+
+    assertEquals("uncategorized", demoRule1.getLocQualityIssueType());
+    assertEquals("tag inheritance failed", "addition", getRuleById("TEST_GO", 
rules).getLocQualityIssueType());
+    assertEquals("tag inheritance overwrite failed", "uncategorized", 
getRuleById("TEST_PHRASES1", rules).getLocQualityIssueType());
+    assertEquals("tag inheritance overwrite failed", "characters", 
getRuleById("test_include", rules).getLocQualityIssueType());
+
+    final List<Rule> groupRules1 = getRulesById("test_spacebefore", rules);
+    assertEquals("tag inheritance form category failed", "addition", 
groupRules1.get(0).getLocQualityIssueType());
+    assertEquals("tag inheritance overwrite failed", "duplication", 
groupRules1.get(1).getLocQualityIssueType());
+    final List<Rule> groupRules2 = 
getRulesById("test_unification_with_negation", rules);
+    assertEquals("tag inheritance from rulegroup failed", "grammar", 
groupRules2.get(0).getLocQualityIssueType());
+
+    final Set<String> categories = getCategoryNames(rules);
+    assertEquals(2, categories.size());
+    assertTrue(categories.contains("misc"));
+    assertTrue(categories.contains("otherCategory"));
   }
-  
+
+  private Set<String> getCategoryNames(List<PatternRule> rules) {
+    final Set<String> categories = new HashSet<String>();
+    for (PatternRule rule : rules) {
+      categories.add(rule.getCategory().getName());
+    }
+    return categories;
+  }
+
   private Rule getRuleById(String id, List<PatternRule> rules) {
     for (Rule rule : rules) {
       if (rule.getId().equals(id)) {
@@ -34,4 +63,14 @@
     throw new RuntimeException("No rule found for id '" + id + "'");
   }
 
+  private List<Rule> getRulesById(String id, List<PatternRule> rules) {
+    final List<Rule> result = new ArrayList<Rule>();
+    for (Rule rule : rules) {
+      if (rule.getId().equals(id)) {
+        result.add(rule);
+      }
+    }
+    return result;
+  }
+
 }

Modified: 
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java   
    2012-11-25 21:29:48 UTC (rev 8453)
+++ 
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java   
    2012-11-25 21:50:56 UTC (rev 8454)
@@ -166,7 +166,8 @@
   public void testRuleMatchesToXML() throws IOException {
     final List<RuleMatch> matches = new ArrayList<RuleMatch>();
     final String text = "This is an test sentence. Here's another sentence 
with more text.";
-    final RuleMatch match = new RuleMatch(new AvsAnRule(null), 8, 10, 
"myMessage");
+    final AvsAnRule rule = new AvsAnRule(null);
+    final RuleMatch match = new RuleMatch(rule, 8, 10, "myMessage");
     match.setColumn(99);
     match.setEndColumn(100);
     match.setLine(44);
@@ -180,7 +181,8 @@
     assertTrue(matcher.matches());
     assertTrue(xml.contains(">\n" +
             "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" 
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
-            "replacements=\"\" context=\"...s is an test...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\"/>\n" +
+            "replacements=\"\" context=\"...s is an test...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" " +
+            "locqualityissuetype=\"terminology\"/>\n" +
             "</matches>\n"));
   }
 
@@ -199,7 +201,8 @@
     final String xml = StringTools.ruleMatchesToXML(matches, text, 5, 
StringTools.XmlPrintMode.NORMAL_XML);
     assertTrue(xml.contains(">\n" +
             "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" 
ruleId=\"MY_ID\" msg=\"myMessage\" " +
-            "replacements=\"\" context=\"...s is a test ...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"MyCategory\"/>\n" 
+
+            "replacements=\"\" context=\"...s is a test ...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"MyCategory\" " +
+            "locqualityissuetype=\"uncategorized\"/>\n" +
             "</matches>\n"));
   }
 
@@ -224,7 +227,8 @@
     final String xml = StringTools.ruleMatchesToXML(matches, text, 5, 
StringTools.XmlPrintMode.NORMAL_XML);
     assertTrue(xml.contains(">\n" +
             "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" 
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
-            "replacements=\"\" context=\"...s is an test...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" 
url=\"http://server.org?id=1&amp;foo=bar\"/>\n" +
+            "replacements=\"\" context=\"...s is an test...\" 
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" 
url=\"http://server.org?id=1&amp;foo=bar\"; " +
+            "locqualityissuetype=\"terminology\"/>\n" +
             "</matches>\n"));
   }
 
@@ -240,7 +244,8 @@
     final String xml = StringTools.ruleMatchesToXML(matches, text, 5, 
StringTools.XmlPrintMode.NORMAL_XML);
     assertTrue(xml.contains(">\n" +
             "<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\" 
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
-            "replacements=\"\" context=\"... is &quot;an test...\" 
contextoffset=\"8\" offset=\"9\" errorlength=\"2\"/>\n" +
+            "replacements=\"\" context=\"... is &quot;an test...\" 
contextoffset=\"8\" offset=\"9\" errorlength=\"2\" " +
+            "locqualityissuetype=\"terminology\"/>\n" +
             "</matches>\n"));
   }
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Monitor your physical, virtual and cloud infrastructure from a single
web console. Get in-depth insight into apps, servers, databases, vmware,
SAP, cloud infrastructure, etc. Download 30-day Free Trial.
Pricing starts from $795 for 25 servers or applications!
http://p.sf.net/sfu/zoho_dev2dev_nov
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits

Reply via email to