Revision: 8454
http://languagetool.svn.sourceforge.net/languagetool/?rev=8454&view=rev
Author: dnaber
Date: 2012-11-25 21:50:56 +0000 (Sun, 25 Nov 2012)
Log Message:
-----------
prototypical implementation of "Localization Quality Issue Type" from
Internationalization Tag Set standard 2.0, which means we categorize our errors
into standard categories, additionally to the LT ones
Modified Paths:
--------------
trunk/JLanguageTool/CHANGES.txt
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java
Modified: trunk/JLanguageTool/CHANGES.txt
===================================================================
--- trunk/JLanguageTool/CHANGES.txt 2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/CHANGES.txt 2012-11-25 21:50:56 UTC (rev 8454)
@@ -30,7 +30,19 @@
-API: Language.getLanguageForShortName() now consistently throws an exception
if the given language code is not known
-
+
+ -HTTP API: the XML we return now contains a new attribute
"locqualityissuetype", which
+ is the "Localization Quality Issue Type" in the upcoming
Internationalization Tag Set (ITS)
+ Version 2.0 standard from W3C. This means errors are now categorized
according to
+ a standard, additionally to LanguageTool's own categories. Useful values are
only
+ returned for English for now.
+ *** Please consider this to be a prototypical implementation for now ***
+ For the values and their meanings, please see
+
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues.
+ For rule developers: specify this using the new 'type' attribute. It is
+ inherited from category to rulegroup, and from rulegroup to rule. If a rule
also
+ has the 'type' it overwrites the rulegroup's and category's 'type'.
+
-HTTP API: support for auto-detecting text language (parameter autodetect=1)
-HTTP API: added HTTPSServer, a lightweight embedded HTTPS server which works
like HTTPServer
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/AbstractCompoundRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -76,6 +76,7 @@
this.withHyphenMessage = withHyphenMessage;
this.withoutHyphenMessage = withoutHyphenMessage;
this.withOrWithoutHyphenMessage = withOrWithoutHyphenMessage;
+ setLocQualityIssueType("misspelling");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/CommaWhitespaceRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -37,6 +37,7 @@
public CommaWhitespaceRule(final ResourceBundle messages) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
+ setLocQualityIssueType("typographical");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/DoublePunctuationRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -35,6 +35,7 @@
public DoublePunctuationRule(final ResourceBundle messages) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
+ setLocQualityIssueType("typographical");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -71,6 +71,7 @@
endSymbols = language.getUnpairedRuleEndSymbols();
numerals = NUMERALS_EN;
uniqueMapInit();
+ setLocQualityIssueType("typographical");
}
Modified: trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/Rule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -40,6 +40,7 @@
private List<String> correctExamples;
private List<IncorrectExample> incorrectExamples;
+ private String locQualityIssueType = "uncategorized";
private Category category;
private URL url;
/** If true, then the rule is turned off by default. */
@@ -249,4 +250,29 @@
public void setUrl(URL url) {
this.url = url;
}
+
+ /**
+ * Returns the Localization Quality Issue Type, as defined
+ * at <a
href="http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues"
+ *
>http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues</a>.
+ *
+ * <p>Note that not all languages nor all rules actually map yet to a type
yet. In those
+ * cases, <tt>uncategorized</tt> is returned.
+ *
+ * @return the Localization Quality Issue Type - <tt>uncategorized</tt> if
no type has been assigned
+ * @since 2.0
+ */
+ public String getLocQualityIssueType() {
+ return locQualityIssueType;
+ }
+
+ /**
+ * Set the Localization Quality Issue Type.
+ * @see #getLocQualityIssueType()
+ * @since 2.0
+ */
+ public void setLocQualityIssueType(String locQualityIssueType) {
+ this.locQualityIssueType = locQualityIssueType;
+ }
+
}
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/UppercaseSentenceStartRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -42,6 +42,7 @@
super(messages);
super.setCategory(new Category(messages.getString("category_case")));
this.language = language;
+ setLocQualityIssueType("typographical");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WhitespaceRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -39,6 +39,7 @@
public WhitespaceRule(final ResourceBundle messages, final Language
language) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
+ setLocQualityIssueType("typographical");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatBeginningRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -40,6 +40,7 @@
public WordRepeatBeginningRule(final ResourceBundle messages, final Language
language) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
+ setLocQualityIssueType("style");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WordRepeatRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -36,6 +36,7 @@
public WordRepeatRule(final ResourceBundle messages, final Language
language) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
+ setLocQualityIssueType("addition");
}
/**
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/WrongWordInContextRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -48,6 +48,7 @@
}
final String filename = getFilename();
contextWordsSet =
loadContextWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(filename));
+ setLocQualityIssueType("terminology");
}
protected abstract String getFilename();
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/DifferentLengthRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -32,7 +32,11 @@
*
*/
public class DifferentLengthRule extends BitextRule {
-
+
+ public DifferentLengthRule() {
+ setLocQualityIssueType("length");
+ }
+
@Override
public String getDescription() {
return "Check if translation length is similar to source length";
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/bitext/SameTranslationRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -33,6 +33,10 @@
*/
public class SameTranslationRule extends BitextRule {
+ public SameTranslationRule() {
+ setLocQualityIssueType("untranslated");
+ }
+
@Override
public String getDescription() {
return "Check if translation is the same as source";
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/AvsAnRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -58,6 +58,7 @@
}
requiresA =
loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_A));
requiresAn =
loadWords(JLanguageTool.getDataBroker().getFromRulesDirAsStream(FILENAME_AN));
+ setLocQualityIssueType("terminology");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/en/BritishReplaceRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -45,6 +45,7 @@
public BritishReplaceRule(final ResourceBundle messages) throws IOException {
super(messages);
+ setLocQualityIssueType("terminology");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/patterns/PatternRuleHandler.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -38,6 +38,9 @@
private boolean defaultOn;
protected Category category;
+ protected String categoryIssueType;
+ protected String ruleGroupIssueType;
+ protected String ruleIssueType;
protected String name;
private String ruleGroupDescription;
private int startPos = -1;
@@ -62,6 +65,9 @@
if ("off".equals(attrs.getValue(DEFAULT))) {
category.setDefaultOff();
}
+ if (attrs.getValue("type") != null) {
+ categoryIssueType = attrs.getValue("type");
+ }
} else if ("rules".equals(qName)) {
final String languageStr = attrs.getValue("lang");
language = Language.getLanguageForShortName(languageStr);
@@ -90,6 +96,9 @@
if (suggestionMatches != null) {
suggestionMatches.clear();
}
+ if (attrs.getValue("type") != null) {
+ ruleIssueType = attrs.getValue("type");
+ }
} else if (PATTERN.equals(qName)) {
startPattern(attrs);
tokenCountForMarker = 0;
@@ -138,6 +147,9 @@
defaultOn = "on".equals(attrs.getValue(DEFAULT));
inRuleGroup = true;
subId = 0;
+ if (attrs.getValue("type") != null) {
+ ruleGroupIssueType = attrs.getValue("type");
+ }
} else if ("suggestion".equals(qName) && inMessage) {
if (YES.equals(attrs.getValue("suppress_misspelled"))) {
message.append("<pleasespellme/>");
@@ -172,7 +184,9 @@
@Override
public void endElement(final String namespaceURI, final String sName,
final String qName) throws SAXException {
- if (RULE.equals(qName)) {
+ if ("category".equals(qName)) {
+ categoryIssueType = null;
+ } else if (RULE.equals(qName)) {
phraseElementInit();
if (phraseElementList.isEmpty()) {
final PatternRule rule = new PatternRule(id, language, elementList,
@@ -199,6 +213,7 @@
if (phraseElementList != null) {
phraseElementList.clear();
}
+ ruleIssueType = null;
} else if (EXCEPTION.equals(qName)) {
finalizeExceptions();
@@ -249,6 +264,7 @@
inMatch = false;
} else if (RULEGROUP.equals(qName)) {
inRuleGroup = false;
+ ruleGroupIssueType = null;
} else if ("suggestion".equals(qName) && inMessage) {
message.append("</suggestion>");
inSuggestion = false;
@@ -319,6 +335,14 @@
throw new RuntimeException("Could not parse URL for rule: " + rule +
": '" + url + "'", e);
}
}
+ // inheritance of values - if no type value is defined for a rule, take
the rule group's value etc:
+ if (ruleIssueType != null) {
+ rule.setLocQualityIssueType(ruleIssueType);
+ } else if (ruleGroupIssueType != null) {
+ rule.setLocQualityIssueType(ruleGroupIssueType);
+ } else if (categoryIssueType != null) {
+ rule.setLocQualityIssueType(categoryIssueType);
+ }
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
===================================================================
---
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -45,6 +45,7 @@
public SpellingCheckRule(final ResourceBundle messages, final Language
language) {
super(messages);
this.language = language;
+ setLocQualityIssueType("misspelling");
}
@Override
Modified:
trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
===================================================================
--- trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/java/org/languagetool/tools/StringTools.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -395,8 +395,12 @@
}
final Category category = match.getRule().getCategory();
if (category != null) {
- xml.append(" category=\"" + category.getName() + "\"");
+ xml.append(" category=\"" + escapeXMLForAPIOutput(category.getName())
+ "\"");
}
+ final String type = match.getRule().getLocQualityIssueType();
+ if (type != null) {
+ xml.append(" locqualityissuetype=\"" + escapeXMLForAPIOutput(type) +
"\"");
+ }
xml.append("/>\n");
}
if (xmlMode == XmlPrintMode.END_XML || xmlMode == XmlPrintMode.NORMAL_XML)
{
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/resource/api-output.dtd
2012-11-25 21:50:56 UTC (rev 8454)
@@ -54,6 +54,14 @@
<!-- The category of the match, if any (added in LanguageTool 1.9). -->
<!ATTLIST error category CDATA #IMPLIED>
+<!-- Localization Quality Issue Type, according to Internationalization
+ Tag Set (ITS) Version 2.0,
+ see
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues
+ (added in LanguageTool 2.0).
+ *** Please consider this to be a prototypical implementation for now ***
+ -->
+<!ATTLIST error locqualityissuetype CDATA #IMPLIED>
+
<!-- The language of the match (added in LanguageTool 2.0). -->
<!ELEMENT language EMPTY>
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/en-GB/grammar.xml
2012-11-25 21:50:56 UTC (rev 8454)
@@ -30,7 +30,7 @@
<!--
====================================================================== -->
<!-- Possible typos -->
<!--
====================================================================== -->
- <category name="American English phrases">
+ <category name="American English phrases" type="terminology">
<rule id="ZIP_CODE_POSTCODE" name="zip code/postcode">
<pattern>
<token>zip</token>
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/en/grammar.xml
2012-11-25 21:50:56 UTC (rev 8454)
@@ -38,7 +38,7 @@
<!--
====================================================================== -->
<!-- Possible typos -->
<!--
====================================================================== -->
- <category name="Possible Typos">
+ <category name="Possible Typos" type="terminology">
<rule id="FEEL_TREE_TO" name="feel tree (free) to">
<pattern>
<token>feel</token>
@@ -5606,7 +5606,7 @@
<!--
====================================================================== -->
<!-- Grammar -->
<!--
====================================================================== -->
- <category name="Grammar">
+ <category name="Grammar" type="grammar">
<rulegroup id="WANT_THAT_I" name="want that I(want me to)">
<!-- TODO: extend this rule to cover more than personal pronouns.
This is actually a very common error for German speakers. -->
<rule>
@@ -8188,7 +8188,7 @@
</rule>
</rulegroup>
</category>
- <category name="Collocations">
+ <category name="Collocations" type="terminology">
<rulegroup id="SUPERIOR_THAN" name="Wrong preposition:
'superior/inferior than' (superior/inferior to)">
<rule>
<pattern>
@@ -8628,7 +8628,7 @@
</rule>
</rulegroup>
</category>
- <category name="Punctuation Errors">
+ <category name="Punctuation Errors" type="typographical">
<rulegroup default="off" id="EG_NO_COMMA" name="'e.g.' without a
comma">
<rule>
<pattern>
@@ -8784,7 +8784,7 @@
</category>
- <category name="Commonly Confused Words">
+ <category name="Commonly Confused Words" type="terminology">
<rulegroup id="SITE_SIDE" name="site (side)">
<rule>
<pattern>
@@ -9335,7 +9335,7 @@
</rule>
</rulegroup>
</category>
- <category name="Nonstandard Phrases">
+ <category name="Nonstandard Phrases" type="terminology">
<rule id="IN_THE_MOMENT" name="in the moment (currently)">
<!-- Typical mistake for German native speakers -->
<pattern>
@@ -9541,7 +9541,7 @@
<example type="correct">He is a well-known actor.</example>
</rule>
</category>
- <category name="Slang">
+ <category name="Slang" type="register">
<rule id="AN_INVITE" name="an invite (invitation)">
<pattern>
<token>an</token>
@@ -9555,7 +9555,7 @@
<example type="correct">Is that an
<marker>invitation</marker>...?</example>
</rule>
</category>
- <category name="Redundant Phrases">
+ <category name="Redundant Phrases" type="style">
<rulegroup id="ATM_MACHINE" name="ATM machine (ATM)">
<rule>
<pattern>
@@ -10967,7 +10967,7 @@
<example type="correct">What the government decides to do depends
on whether the bill passes.</example>
</rule>
</category>
- <category name="Bad style">
+ <category name="Bad style" type="style">
<rule default="off" id="SENT_START_NUM" name="Number starting a
sentence">
<pattern>
<token postag="SENT_START"></token>
@@ -11000,7 +11000,7 @@
<example type="correct">The security software we offer is the
state-of-the art achievement.</example>
</rule>
</category>
- <category name="Capitalization">
+ <category name="Capitalization" type="typographical">
<rulegroup default="off" id="EN_CAPITALIZE" name="Capitalize lowercase
words ('i am')">
<rule>
<pattern case_sensitive="yes">
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
===================================================================
--- trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/main/resources/org/languagetool/rules/rules.xsd
2012-11-25 21:50:56 UTC (rev 8454)
@@ -63,6 +63,7 @@
</xs:simpleType>
</xs:attribute>
<xs:attribute name="name" type="xs:string"
use="required" />
+ <xs:attribute ref="type" use="optional" />
</xs:complexType>
</xs:element>
@@ -89,14 +90,54 @@
</xs:attribute>
<xs:attribute name="name" type="xs:string"
use="optional" />
<xs:attribute name="id" type="xs:ID" use="required" />
+ <xs:attribute ref="type" use="optional" />
</xs:complexType>
</xs:element>
+ <xs:attribute name="type">
+ <xs:annotation>
+ <xs:documentation xml:lang="en">
+ Localization Quality Issue Type, according to
Internationalization Tag Set (ITS) Version 2.0,
+ see
http://www.w3.org/International/multilingualweb/lt/drafts/its20/its20.html#lqissue-typevalues
+ (added in LanguageTool 2.0)
+ </xs:documentation>
+ </xs:annotation>
+ <xs:simpleType>
+ <xs:restriction base="xs:NMTOKEN">
+ <xs:enumeration value="terminology" />
+ <xs:enumeration value="mistranslation" />
+ <xs:enumeration value="omission" />
+ <xs:enumeration value="untranslated" />
+ <xs:enumeration value="addition" />
+ <xs:enumeration value="duplication" />
+ <xs:enumeration value="inconsistency" />
+ <xs:enumeration value="grammar" />
+ <xs:enumeration value="legal" />
+ <xs:enumeration value="register" />
+ <xs:enumeration value="locale-specific-content" />
+ <xs:enumeration value="locale-violation" />
+ <xs:enumeration value="style" />
+ <xs:enumeration value="characters" />
+ <xs:enumeration value="misspelling" />
+ <xs:enumeration value="typographical" />
+ <xs:enumeration value="formatting" />
+ <xs:enumeration value="inconsistent-entities" />
+ <xs:enumeration value="numbers" />
+ <xs:enumeration value="markup" />
+ <xs:enumeration value="pattern-problem" />
+ <xs:enumeration value="whitespace" />
+ <xs:enumeration value="internationalization" />
+ <xs:enumeration value="length" />
+ <xs:enumeration value="uncategorized" />
+ <xs:enumeration value="other" />
+ </xs:restriction>
+ </xs:simpleType>
+ </xs:attribute>
+
<xs:annotation>
<xs:documentation xml:lang="en"> The rule element. The
unique ID is
required only if the rule is not contained in a rule
group.
-
The rule can be switched by default off (using the
default attribute).
</xs:documentation>
</xs:annotation>
@@ -119,6 +160,7 @@
</xs:attribute>
<xs:attribute name="name" type="xs:string"
use="optional" />
<xs:attribute name="id" type="xs:ID" use="optional" />
+ <xs:attribute ref="type" use="optional" />
</xs:complexType>
</xs:element>
Modified:
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
===================================================================
---
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/main/resources/org/languagetool/rules/xx/grammar.xml
2012-11-25 21:50:56 UTC (rev 8454)
@@ -47,6 +47,7 @@
<token regexp="yes">.*_sing_.*</token>
</equivalence>
</unification>
+
<phrases>
<phrase id="UNIFICATION_PHRASE">
<unify>
@@ -110,8 +111,8 @@
</phrases>
<category name="misc">
- <!-- a trivial demo rule that matches "foo" followed by
"bar" -->
<rule id="DEMO_RULE" name="Find 'foo bar'">
+ <!-- a trivial demo rule that matches "foo" followed by "bar" -->
<pattern case_sensitive="no">
<token>foo</token>
<token>bar</token>
@@ -143,6 +144,9 @@
<example type="correct">a small <marker>test</marker></example>
<example type="incorrect">a small <marker>toast</marker></example>
</rule>
+ </category>
+
+ <category name="otherCategory" type="addition">
<rule id="TEST_GO" name="another test of phrases">
<pattern>
<token>foo</token>
@@ -154,7 +158,7 @@
<example type="incorrect"><marker>foo go</marker> bar</example>
<example type="incorrect"><marker>foo goa</marker> bar</example>
</rule>
- <rule id="TEST_PHRASES1" name="test phrases mechanism">
+ <rule id="TEST_PHRASES1" name="test phrases mechanism"
type="uncategorized">
<pattern>
<phraseref idref="COMPLEX_MULTIPLE"></phraseref>
<token>there</token>
@@ -163,7 +167,7 @@
<example type="correct">go here</example>
<example type="incorrect"><marker>first goes last there</marker>,
please!</example>
</rule>
- <rule id="test_include" name="test two includeblocks">
+ <rule id="test_include" name="test two includeblocks" type="characters">
<pattern>
<phraseref idref="TEST_INCLUDE"></phraseref>
</pattern>
@@ -352,26 +356,26 @@
<example type="correct">This is Test1 abc Cde End.</example>
</rule>
<rulegroup id="test_spacebefore" name="Test spacebefore on exceptions">
- <rule>
- <pattern>
- <token>blah<exception scope="previous"
spacebefore="no">'</exception></token>
- <token>blah</token>
- </pattern>
- <message>This is a dummy message.</message>
- <example type="correct">Dogs' blah blah</example>
- <example type="incorrect">Dogs <marker>blah blah</marker></example>
- </rule>
- <rule>
- <pattern>
- <token>blah<exception spacebefore="no">blah</exception></token>
- <token>blah</token>
- </pattern>
- <message>This is a dummy message.</message>
- <example type="correct">Dogs 'blah blah</example>
- <example type="incorrect">Dogs <marker>blah blah</marker></example>
- </rule>
+ <rule>
+ <pattern>
+ <token>blah<exception scope="previous"
spacebefore="no">'</exception></token>
+ <token>blah</token>
+ </pattern>
+ <message>This is a dummy message.</message>
+ <example type="correct">Dogs' blah blah</example>
+ <example type="incorrect">Dogs <marker>blah blah</marker></example>
+ </rule>
+ <rule type="duplication">
+ <pattern>
+ <token>blah<exception spacebefore="no">blah</exception></token>
+ <token>blah</token>
+ </pattern>
+ <message>This is a dummy message.</message>
+ <example type="correct">Dogs 'blah blah</example>
+ <example type="incorrect">Dogs <marker>blah blah</marker></example>
+ </rule>
</rulegroup>
- <rulegroup id="test_unification_with_negation" name="Test unification with
negation">
+ <rulegroup id="test_unification_with_negation" name="Test unification with
negation" type="grammar">
<rule>
<pattern>
<unify negate="yes">
@@ -391,4 +395,5 @@
</rule>
</rulegroup>
</category>
+
</rules>
\ No newline at end of file
Modified: trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
===================================================================
--- trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
2012-11-25 21:29:48 UTC (rev 8453)
+++ trunk/JLanguageTool/src/test/java/org/languagetool/MainTest.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -187,7 +187,8 @@
String output = new String(this.out.toByteArray());
assertTrue("Got: " + output, output.contains("<error fromy=\"4\"
fromx=\"5\" toy=\"4\" tox=\"10\" " +
"ruleId=\"ENGLISH_WORD_REPEAT_RULE\" msg=\"Possible typo: you
repeated a word\" replacements=\"is\" " +
- "context=\"This is is a test of language tool. \"
contextoffset=\"5\" offset=\"5\" errorlength=\"5\"
category=\"Miscellaneous\"/>"));
+ "context=\"This is is a test of language tool. \"
contextoffset=\"5\" offset=\"5\" errorlength=\"5\" " +
+ "category=\"Miscellaneous\" locqualityissuetype=\"addition\"/>"));
// note: the offset is relative to the sentence... this seems wrong - it
happens because of the way
// the command line client feeds the data into the check() methods.
}
@@ -284,7 +285,7 @@
assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\"
tox=\"10\" ruleId=\"EN_A_VS_AN\" " +
"msg=\"Use 'a' instead of 'an' if the following word doesn't start
with a vowel sound, e.g. 'a sentence', " +
"'a university'\" replacements=\"a\" context=\"This is an test.
This is a test of of language tool. ...\" " +
- "contextoffset=\"8\" offset=\"8\" errorlength=\"2\"
category=\"Miscellaneous\"/>"));
+ "contextoffset=\"8\" offset=\"8\" errorlength=\"2\"
category=\"Miscellaneous\" locqualityissuetype=\"terminology\"/>"));
}
public void testGermanFileWithURL() throws Exception {
@@ -318,7 +319,7 @@
assertTrue(output.contains("<error fromy=\"0\" fromx=\"8\" toy=\"0\"
tox=\"20\" ruleId=\"BRAK_PRZECINKA_KTORY\" subId=\"5\""));
//This tests whether XML encoding is actually UTF-8:
assertTrue(output.contains("msg=\"Brak przecinka w tym fragmencie zdania.
Przecinek prawdopodobnie należy postawić tak: 'świnia, która'.\"
replacements=\"świnia, która\" "));
- assertTrue(output.contains("context=\"To jest świnia która się ślini. \"
contextoffset=\"8\" offset=\"8\" errorlength=\"12\" category=\"Błędy
interpunkcyjne\"/>"));
+ assertTrue(output.contains("context=\"To jest świnia która się ślini. \"
contextoffset=\"8\" offset=\"8\" errorlength=\"12\" category=\"Błędy
interpunkcyjne\""));
}
public void testPolishLineNumbers() throws Exception {
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/rules/patterns/PatternRuleLoaderTest.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -1,6 +1,9 @@
package org.languagetool.rules.patterns;
+import java.util.ArrayList;
+import java.util.HashSet;
import java.util.List;
+import java.util.Set;
import junit.framework.TestCase;
@@ -15,16 +18,42 @@
final String name = "/xx/grammar.xml";
final List<PatternRule> rules =
prg.getRules(JLanguageTool.getDataBroker().getFromRulesDirAsStream(name), name);
assertTrue(rules.size() >= 30);
+
final Rule demoRule1 = getRuleById("DEMO_RULE", rules);
assertEquals("http://fake-server.org/foo-bar-error-explained",
demoRule1.getUrl().toString());
assertEquals("[This is <marker>fuu bah</marker>.]",
demoRule1.getCorrectExamples().toString());
final List<IncorrectExample> incorrectExamples =
demoRule1.getIncorrectExamples();
assertEquals(1, incorrectExamples.size());
assertEquals("This is <marker>foo bar</marker>.",
incorrectExamples.get(0).getExample());
+
final Rule demoRule2 = getRuleById("API_OUTPUT_TEST_RULE", rules);
assertNull(demoRule2.getUrl());
+
+ assertEquals("uncategorized", demoRule1.getLocQualityIssueType());
+ assertEquals("tag inheritance failed", "addition", getRuleById("TEST_GO",
rules).getLocQualityIssueType());
+ assertEquals("tag inheritance overwrite failed", "uncategorized",
getRuleById("TEST_PHRASES1", rules).getLocQualityIssueType());
+ assertEquals("tag inheritance overwrite failed", "characters",
getRuleById("test_include", rules).getLocQualityIssueType());
+
+ final List<Rule> groupRules1 = getRulesById("test_spacebefore", rules);
+ assertEquals("tag inheritance form category failed", "addition",
groupRules1.get(0).getLocQualityIssueType());
+ assertEquals("tag inheritance overwrite failed", "duplication",
groupRules1.get(1).getLocQualityIssueType());
+ final List<Rule> groupRules2 =
getRulesById("test_unification_with_negation", rules);
+ assertEquals("tag inheritance from rulegroup failed", "grammar",
groupRules2.get(0).getLocQualityIssueType());
+
+ final Set<String> categories = getCategoryNames(rules);
+ assertEquals(2, categories.size());
+ assertTrue(categories.contains("misc"));
+ assertTrue(categories.contains("otherCategory"));
}
-
+
+ private Set<String> getCategoryNames(List<PatternRule> rules) {
+ final Set<String> categories = new HashSet<String>();
+ for (PatternRule rule : rules) {
+ categories.add(rule.getCategory().getName());
+ }
+ return categories;
+ }
+
private Rule getRuleById(String id, List<PatternRule> rules) {
for (Rule rule : rules) {
if (rule.getId().equals(id)) {
@@ -34,4 +63,14 @@
throw new RuntimeException("No rule found for id '" + id + "'");
}
+ private List<Rule> getRulesById(String id, List<PatternRule> rules) {
+ final List<Rule> result = new ArrayList<Rule>();
+ for (Rule rule : rules) {
+ if (rule.getId().equals(id)) {
+ result.add(rule);
+ }
+ }
+ return result;
+ }
+
}
Modified:
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java
===================================================================
---
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java
2012-11-25 21:29:48 UTC (rev 8453)
+++
trunk/JLanguageTool/src/test/java/org/languagetool/tools/StringToolsTest.java
2012-11-25 21:50:56 UTC (rev 8454)
@@ -166,7 +166,8 @@
public void testRuleMatchesToXML() throws IOException {
final List<RuleMatch> matches = new ArrayList<RuleMatch>();
final String text = "This is an test sentence. Here's another sentence
with more text.";
- final RuleMatch match = new RuleMatch(new AvsAnRule(null), 8, 10,
"myMessage");
+ final AvsAnRule rule = new AvsAnRule(null);
+ final RuleMatch match = new RuleMatch(rule, 8, 10, "myMessage");
match.setColumn(99);
match.setEndColumn(100);
match.setLine(44);
@@ -180,7 +181,8 @@
assertTrue(matcher.matches());
assertTrue(xml.contains(">\n" +
"<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\"
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
- "replacements=\"\" context=\"...s is an test...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\"/>\n" +
+ "replacements=\"\" context=\"...s is an test...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" " +
+ "locqualityissuetype=\"terminology\"/>\n" +
"</matches>\n"));
}
@@ -199,7 +201,8 @@
final String xml = StringTools.ruleMatchesToXML(matches, text, 5,
StringTools.XmlPrintMode.NORMAL_XML);
assertTrue(xml.contains(">\n" +
"<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\"
ruleId=\"MY_ID\" msg=\"myMessage\" " +
- "replacements=\"\" context=\"...s is a test ...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"MyCategory\"/>\n"
+
+ "replacements=\"\" context=\"...s is a test ...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\" category=\"MyCategory\" " +
+ "locqualityissuetype=\"uncategorized\"/>\n" +
"</matches>\n"));
}
@@ -224,7 +227,8 @@
final String xml = StringTools.ruleMatchesToXML(matches, text, 5,
StringTools.XmlPrintMode.NORMAL_XML);
assertTrue(xml.contains(">\n" +
"<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\"
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
- "replacements=\"\" context=\"...s is an test...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\"
url=\"http://server.org?id=1&foo=bar\"/>\n" +
+ "replacements=\"\" context=\"...s is an test...\"
contextoffset=\"8\" offset=\"8\" errorlength=\"2\"
url=\"http://server.org?id=1&foo=bar\" " +
+ "locqualityissuetype=\"terminology\"/>\n" +
"</matches>\n"));
}
@@ -240,7 +244,8 @@
final String xml = StringTools.ruleMatchesToXML(matches, text, 5,
StringTools.XmlPrintMode.NORMAL_XML);
assertTrue(xml.contains(">\n" +
"<error fromy=\"44\" fromx=\"98\" toy=\"45\" tox=\"99\"
ruleId=\"EN_A_VS_AN\" msg=\"myMessage\" " +
- "replacements=\"\" context=\"... is "an test...\"
contextoffset=\"8\" offset=\"9\" errorlength=\"2\"/>\n" +
+ "replacements=\"\" context=\"... is "an test...\"
contextoffset=\"8\" offset=\"9\" errorlength=\"2\" " +
+ "locqualityissuetype=\"terminology\"/>\n" +
"</matches>\n"));
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Monitor your physical, virtual and cloud infrastructure from a single
web console. Get in-depth insight into apps, servers, databases, vmware,
SAP, cloud infrastructure, etc. Download 30-day Free Trial.
Pricing starts from $795 for 25 servers or applications!
http://p.sf.net/sfu/zoho_dev2dev_nov
_______________________________________________
Languagetool-commits mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-commits