Revision: 6648
http://languagetool.svn.sourceforge.net/languagetool/?rev=6648&view=rev
Author: dnaber
Date: 2012-03-25 11:28:52 +0000 (Sun, 25 Mar 2012)
Log Message:
-----------
small code cleanup
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishUnpairedBracketsRule.java
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/RussianUnpairedBracketsRule.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
2012-03-25 11:15:47 UTC (rev 6647)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/GenericUnpairedBracketsRule.java
2012-03-25 11:28:52 UTC (rev 6648)
@@ -39,6 +39,13 @@
*/
public class GenericUnpairedBracketsRule extends Rule {
+ private static final Pattern NUMERALS_EN =
+
Pattern.compile("(?i)\\d{1,2}?[a-z']*|M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
+ private static final Pattern PUNCTUATION = Pattern.compile("\\p{Punct}");
+ private static final Pattern PUNCTUATION_NO_DOT =
+ Pattern.compile("[\\p{Punct}&&[^\\.]]");
+
+ protected Pattern numerals;
protected String[] startSymbols;
protected String[] endSymbols;
@@ -47,37 +54,22 @@
*/
protected final UnsyncStack<SymbolLocator> symbolStack = new
UnsyncStack<SymbolLocator>();
- /**
- * Stack of rule matches.
- */
+ // Stack of rule matches.
private final UnsyncStack<RuleMatchLocator> ruleMatchStack = new
UnsyncStack<RuleMatchLocator>();
private boolean endOfParagraph;
-
- private static final Pattern PUNCTUATION = Pattern.compile("\\p{Punct}");
- private static final Pattern PUNCTUATION_NO_DOT = Pattern
- .compile("[\\p{Punct}&&[^\\.]]");
-
- public static Pattern NUMERALS;
-
- private static final Pattern NUMERALS_EN = Pattern
-
.compile("(?i)\\d{1,2}?[a-z']*|M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
-
private int ruleMatchIndex;
private List<RuleMatch> ruleMatches;
-
private Map<String,Boolean> uniqueMap;
public GenericUnpairedBracketsRule(final ResourceBundle messages,
final Language language) {
super(messages);
super.setCategory(new Category(messages.getString("category_misc")));
-
setParagraphBackTrack(true);
startSymbols = language.getUnpairedRuleStartSymbols();
endSymbols = language.getUnpairedRuleEndSymbols();
-
- NUMERALS=NUMERALS_EN;
+ numerals = NUMERALS_EN;
uniqueMapInit();
}
@@ -98,7 +90,7 @@
int found = 0;
for (String endSymbol1 : endSymbols) {
if (endSymbol1.equals(endSymbol)) {
- found++;
+ found++;
}
}
uniqueMap.put(endSymbol, found == 1);
@@ -108,16 +100,11 @@
/**
* Generic method to specify an exception. For unspecified
* language, it simply returns true, which means no exception.
- * @param token
- * String token
- * @param tokens
- * Sentence tokens
- * @param i
- * Current token index
- * @param precSpace
- * boolean: is preceded with space
- * @param follSpace
- * boolean: is followed with space
+ * @param token String token
+ * @param tokens Sentence tokens
+ * @param i Current token index
+ * @param precSpace is preceded with space
+ * @param follSpace is followed with space
* @return
*/
protected boolean isNoException(final String token,
@@ -167,7 +154,7 @@
} else if (noException && followedByWhitespace
&& token.equals(endSymbols[j])) {
if (i > 1 && endSymbols[j].equals(")")
- && (NUMERALS.matcher(tokens[i - 1].getToken()).matches()
+ && (numerals.matcher(tokens[i - 1].getToken()).matches()
&& !(!symbolStack.empty()
&& "(".equals(symbolStack.peek().symbol)))) {
} else {
@@ -224,7 +211,6 @@
ruleMatches.remove(rLoc.myIndex);
ruleMatchStack.pop();
return null;
- // if (ruleMatches.get(rLoc.myIndex).getFromPos())
}
if (isInMatches(rLoc.index)) {
setAsDeleted(rLoc.index);
@@ -234,8 +220,7 @@
}
}
}
- ruleMatchStack.push(new RuleMatchLocator(symbol, ruleMatchIndex,
- ruleMatches.size()));
+ ruleMatchStack.push(new RuleMatchLocator(symbol, ruleMatchIndex,
ruleMatches.size()));
ruleMatchIndex++;
return new RuleMatch(this, startPos, startPos + symbol.length(), messages
.getString("unpaired_brackets"));
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishUnpairedBracketsRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishUnpairedBracketsRule.java
2012-03-25 11:15:47 UTC (rev 6647)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/en/EnglishUnpairedBracketsRule.java
2012-03-25 11:28:52 UTC (rev 6648)
@@ -47,14 +47,13 @@
}
@Override
- protected boolean isNoException(final String token,
+ protected boolean isNoException(final String tokenStr,
final AnalyzedTokenReadings[] tokens, final int i, final int j, final
boolean precSpace,
final boolean follSpace) {
-
-//TODO: add an', o', 'till, 'tain't, 'cept, 'fore in the disambiguator
-//and mark up as contractions somehow
-// add exception for dates like '52
+ //TODO: add an', o', 'till, 'tain't, 'cept, 'fore in the disambiguator
+ //and mark up as contractions somehow
+ // add exception for dates like '52
if (i <= 1) {
return true;
@@ -62,25 +61,26 @@
if (!precSpace && follSpace) {
// exception for English inches, e.g., 20"
- if ("\"".equals(token)
- && NUMBER.matcher(tokens[i - 1].getToken()).matches()) {
+ final AnalyzedTokenReadings prevToken = tokens[i - 1];
+ if ("\"".equals(tokenStr)
+ && NUMBER.matcher(prevToken.getToken()).matches()) {
return false;
}
- // Exception for English plural Saxon genetive
+ // Exception for English plural Saxon genitive
// current disambiguation scheme is a bit too greedy
// for adjectives
- if ("'".equals(token) && tokens[i].hasPosTag("POS")) {
+ if ("'".equals(tokenStr) && tokens[i].hasPosTag("POS")) {
return false;
}
// puttin' on the Ritz
- if ("'".equals(token) && tokens[i - 1].hasPosTag("VBG")
- && tokens[i - 1].getToken().endsWith("in")) {
+ if ("'".equals(tokenStr) && prevToken.hasPosTag("VBG")
+ && prevToken.getToken().endsWith("in")) {
return false;
}
}
if (precSpace && !follSpace) {
// hold 'em!
- if ("'".equals(token) && i + 1 < tokens.length
+ if ("'".equals(tokenStr) && i + 1 < tokens.length
&& "em".equals(tokens[i + 1].getToken())) {
return false;
}
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/RussianUnpairedBracketsRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/RussianUnpairedBracketsRule.java
2012-03-25 11:15:47 UTC (rev 6647)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/ru/RussianUnpairedBracketsRule.java
2012-03-25 11:28:52 UTC (rev 6648)
@@ -21,7 +21,6 @@
import java.util.ResourceBundle;
import java.util.regex.Pattern;
-import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.rules.GenericUnpairedBracketsRule;
@@ -31,14 +30,12 @@
private static final String[] RU_END_SYMBOLS = {")", "}", "“", "»", "\"",
"'"};
private static final Pattern NUMERALS_RU =
Pattern.compile("(?i)\\d{1,2}?[а-я]*|[а-я]|[А-Я]|[а-я][а-я]|[А-Я][А-Я]|(?i)\\d{1,2}?[a-z']*|M*(D?C{0,3}|C[DM])(L?X{0,3}|X[LC])(V?I{0,3}|I[VX])$");
-
-
public RussianUnpairedBracketsRule(final ResourceBundle messages,
final Language language) {
super(messages, language);
startSymbols = RU_START_SYMBOLS;
endSymbols = RU_END_SYMBOLS;
- NUMERALS=NUMERALS_RU;
+ numerals = NUMERALS_RU;
uniqueMapInit();
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
This SF email is sponsosred by:
Try Windows Azure free for 90 days Click Here
http://p.sf.net/sfu/sfd2d-msazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs