Revision: 6676
http://languagetool.svn.sourceforge.net/languagetool/?rev=6676&view=rev
Author: dnaber
Date: 2012-04-01 13:33:36 +0000 (Sun, 01 Apr 2012)
Log Message:
-----------
break up PatternRule into two classes
Modified Paths:
--------------
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
Added Paths:
-----------
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
Modified:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
2012-04-01 10:46:03 UTC (rev 6675)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
2012-04-01 13:33:36 UTC (rev 6676)
@@ -18,17 +18,15 @@
*/
package org.languagetool.rules.patterns;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
import org.languagetool.AnalyzedSentence;
-import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.Language;
import org.languagetool.rules.RuleMatch;
import org.languagetool.tools.StringTools;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
/**
* A Rule that describes a language error as a simple pattern of words or of
* part-of-speech tags.
@@ -37,24 +35,21 @@
*/
public class PatternRule extends AbstractPatternRule {
- private static final String SUGG_TAG = "<suggestion>";
- private static final String END_SUGG_TAG = "</suggestion>";
+ private final String shortMessage;
- private String subId; // because there can be more than one rule in a rule
group
+ /**
+ * A list of elements as they appear in XML file (phrases count as single
+ * tokens in case of matches or skipping).
+ */
+ private final List<Integer> elementNo;
+ private String subId; // because there can be more than one rule in a rule
group
private String message;
- private String shortMessage;
/** Formatted suggestion elements. **/
private List<Match> suggestionMatches;
/**
- * A list of elements as they appear in XML file (phrases count as single
- * tokens in case of matches or skipping).
- */
- private List<Integer> elementNo;
-
- /**
* This property is used for short-circuiting evaluation of the elementNo
list
* order.
*/
@@ -85,19 +80,10 @@
final List<Element> elements, final String description,
final String message, final String shortMessage) {
super(id, description, language, elements, false);
- if (id == null) {
- throw new NullPointerException("id cannot be null");
- }
- if (language == null) {
- throw new NullPointerException("language cannot be null");
- }
- if (elements == null) {
- throw new NullPointerException("elements cannot be null");
- }
- if (description == null) {
- throw new NullPointerException("description/name cannot be null");
- }
-
+ if (id == null) throw new NullPointerException("id cannot be null");
+ if (language == null) throw new NullPointerException("language cannot be
null");
+ if (elements == null) throw new NullPointerException("elements cannot be
null");
+ if (description == null) throw new NullPointerException("description/name
cannot be null");
this.message = message;
this.shortMessage = shortMessage;
this.elementNo = new ArrayList<Integer>();
@@ -113,7 +99,6 @@
useList = true;
} else {
elementNo.add(cnt);
- prevName = "";
curName = "";
cnt = 0;
}
@@ -194,141 +179,11 @@
}
@Override
- public final RuleMatch[] match(final AnalyzedSentence text)
- throws IOException {
- final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
- final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
- final int[] tokenPositions = new int[tokens.length + 1];
- final int patternSize = patternElements.size();
- final int limit = Math.max(0, tokens.length - patternSize + 1);
- Element elem = null;
- int i = 0;
- while (i < limit && !(sentStart && i > 0)) {
- boolean allElementsMatch = false;
- int firstMatchToken = -1;
- int lastMatchToken = -1;
- int matchingTokens = 0;
- int prevSkipNext = 0;
- // this variable keeps the total number
- // of tokens skipped
- int skipShiftTotal = 0;
- if (testUnification) {
- unifier.reset();
- }
- for (int k = 0; k < patternSize; k++) {
- final Element prevElement = elem;
- elem = patternElements.get(k);
- setupRef(firstMatchToken, elem, tokens);
- final int nextPos = i + k + skipShiftTotal;
- prevMatched = false;
- if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { //
SENT_END?
- prevSkipNext = tokens.length - (nextPos + 1);
- }
- final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length -
(patternSize - k));
- for (int m = nextPos; m <= maxTok; m++) {
- allElementsMatch = !tokens[m].isImmunized() &&
testAllReadings(tokens, elem, prevElement, m,
- firstMatchToken, prevSkipNext);
- if (allElementsMatch) {
- lastMatchToken = m;
- final int skipShift = lastMatchToken - nextPos;
- tokenPositions[matchingTokens] = skipShift + 1;
- prevSkipNext = translateElementNo(elem.getSkipNext());
- matchingTokens++;
- skipShiftTotal += skipShift;
- if (firstMatchToken == -1) {
- firstMatchToken = lastMatchToken;
- }
- break;
- }
- }
- if (!allElementsMatch) {
- break;
- }
- }
-
- if (allElementsMatch && matchingTokens == patternSize) {
- final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens,
- firstMatchToken, lastMatchToken, matchingTokens);
- if (ruleMatch != null) {
- ruleMatches.add(ruleMatch);
- }
- }
- i++;
- }
- return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+ public final RuleMatch[] match(final AnalyzedSentence text) throws
IOException {
+ final PatternRuleMatcher matcher = new PatternRuleMatcher(this, useList);
+ return matcher.match(text);
}
- private RuleMatch createRuleMatch(final int[] tokenPositions,
- final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
- final int lastMatchToken, final int matchingTokens) throws IOException {
- final String errMessage = formatMatches(tokens, tokenPositions,
- firstMatchToken, message);
- int correctedStPos = 0;
- if (startPositionCorrection > 0) {
- for (int l = 0; l <= startPositionCorrection; l++) {
- correctedStPos += tokenPositions[l];
- }
- correctedStPos--;
- }
- int correctedEndPos = 0;
- if (endPositionCorrection < 0) {
- int l = 0;
- while (l > endPositionCorrection) {
- correctedEndPos -= tokenPositions[matchingTokens + l - 1];
- l--;
- }
- }
- AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
- + correctedStPos];
- boolean startsWithUppercase = StringTools
- .startsWithUppercase(firstMatchTokenObj.getToken())
- && !matchConvertsCase();
-
- if (firstMatchTokenObj.isSentStart()
- && tokens.length > firstMatchToken + correctedStPos + 1) {
- // make uppercasing work also at sentence start:
- firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
- startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj
- .getToken());
- }
- int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
- // FIXME: this is fishy, assumes that comma should always come before
- // whitespace
- if (errMessage.contains(SUGG_TAG + ",")
- && firstMatchToken + correctedStPos >= 1) {
- fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
- + tokens[firstMatchToken + correctedStPos - 1].getToken().length();
- }
-
- final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
- + tokens[lastMatchToken + correctedEndPos].getToken().length();
- if (fromPos < toPos) { // this can happen with some skip="-1" when the last
- // token is not matched
- return new RuleMatch(this, fromPos, toPos,
- errMessage, shortMessage, startsWithUppercase);
- } // failed to create any rule match...
- return null;
- }
-
- /**
- * Checks if the suggestion starts with a match that is supposed to convert
- * case. If it does, stop the default conversion to uppercase.
- *
- * @return true, if the match converts the case of the token.
- */
- private boolean matchConvertsCase() {
- if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
- final int sugStart = message.indexOf(SUGG_TAG) + SUGG_TAG.length();
- for (Match sMatch : suggestionMatches) {
- if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
- && message.charAt(sugStart) == '\\') {
- return true;
- }
- }
- }
- return false;
- }
-
public final void addSuggestionMatch(final Match m) {
if (suggestionMatches == null) {
suggestionMatches = new ArrayList<Match>();
@@ -337,253 +192,23 @@
}
/**
- * Gets the index of the element indexed by i, adding any offsets because of
- * the phrases in the rule.
- *
- * @param i
- * Current element index.
- * @return int Index translated into XML element no.
+ * For testing only.
*/
- private int translateElementNo(final int i) {
- if (!useList || i < 0) {
- return i;
- }
- int j = 0;
- for (int k = 0; k < i; k++) {
- j += elementNo.get(k);
- }
- return j;
+ public final List<Element> getElements() {
+ return patternElements;
}
- /**
- * Returns true when the token in the rule references a phrase composed of
- * many tokens.
- *
- * @param i
- * The index of the token.
- * @return true if the phrase is under the index, false otherwise.
- **/
- private int phraseLen(final int i) {
- if (!useList || i > (elementNo.size() - 1)) {
- return 1;
- }
- return elementNo.get(i);
+ List<Integer> getElementNo() {
+ return elementNo;
}
- /**
- * Creates a Cartesian product of the arrays stored in the input array.
- *
- * @param input
- * Array of string arrays to combine.
- * @param output
- * Work array of strings.
- * @param r
- * Starting parameter (use 0 to get all combinations).
- * @param lang
- * Text language for adding spaces in some languages.
- * @return Combined array of @String.
- */
- private static String[] combineLists(final String[][] input,
- final String[] output, final int r, final Language lang) {
- final List<String> outputList = new ArrayList<String>();
- if (r == input.length) {
- final StringBuilder sb = new StringBuilder();
- for (int k = 0; k < output.length; k++) {
- sb.append(output[k]);
- if (k < output.length - 1) {
- sb.append(StringTools.addSpace(output[k + 1], lang));
- }
- }
- outputList.add(sb.toString());
- } else {
- for (int c = 0; c < input[r].length; c++) {
- output[r] = input[r][c];
- final String[] sList = combineLists(input, output, r + 1, lang);
- outputList.addAll(Arrays.asList(sList));
- }
- }
- return outputList.toArray(new String[outputList.size()]);
+ String getShortMessage() {
+ return shortMessage;
}
-
- /**
- * Concatenates the matches, and takes care of phrases (including inflection
- * using synthesis).
- *
- * @param start
- * Position of the element as referenced by match element in the
- * rule.
- * @param index
- * The index of the element found in the matching sentence.
- * @param tokenIndex
- * The position of the token in the AnalyzedTokenReadings array.
- * @param tokens
- * Array of @AnalyzedTokenReadings
- * @return @String[] Array of concatenated strings
- * @throws IOException
- * in case disk operations (used in synthesizer) go wrong.
- */
- private String[] concatMatches(final int start, final int index,
- final int tokenIndex, final AnalyzedTokenReadings[] tokens,
- final int nextTokenPos)
- throws IOException {
- String[] finalMatch = null;
- if (suggestionMatches.get(start) != null) {
- final int len = phraseLen(index);
- if (len == 1) {
- final int skippedTokens = nextTokenPos - tokenIndex;
- suggestionMatches.get(start).setToken(tokens, tokenIndex - 1,
skippedTokens);
- suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
- finalMatch = suggestionMatches.get(start).toFinalString();
- } else {
- final List<String[]> matchList = new ArrayList<String[]>();
- for (int i = 0; i < len; i++) {
- final int skippedTokens = nextTokenPos - (tokenIndex + i);
- suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i,
skippedTokens);
- suggestionMatches.get(start)
- .setSynthesizer(language.getSynthesizer());
- matchList.add(suggestionMatches.get(start).toFinalString());
- }
- return combineLists(matchList.toArray(new String[matchList.size()][]),
- new String[matchList.size()], 0, language);
- }
- }
- return finalMatch;
+
+ List<Match> getSuggestionMatches() {
+ return suggestionMatches;
}
- /**
- * Replace back references generated with <match> and \\1 in message
- * using Match class, and take care of skipping.
- *
- * @param tokenReadings
- * Array of AnalyzedTokenReadings that were matched against the
- * pattern
- * @param positions
- * Array of relative positions of matched tokens
- * @param firstMatchTok
- * Position of the first matched token
- * @param errorMsg
- * String containing suggestion markup
- * @return String Formatted message.
- * @throws IOException
- */
- private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
- final int[] positions, final int firstMatchTok, final String errorMsg)
- throws IOException {
- String errorMessage = errorMsg;
- int matchCounter = 0;
- final int[] numbersToMatches = new int[errorMsg.length()];
- boolean newWay = false;
- int errLen = errorMessage.length();
- int errMarker = errorMessage.indexOf('\\');
- boolean numberFollows = false;
- if (errMarker >= 0 && errMarker < errLen - 1) {
- numberFollows = StringTools.isPositiveNumber(errorMessage
- .charAt(errMarker + 1));
- }
- while (errMarker >= 0 && numberFollows) {
- final int backslashPos = errorMessage.indexOf('\\');
- if (backslashPos >= 0 &&
StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) {
- int numLen = 1;
- while (backslashPos + numLen < errorMessage.length()
- && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos +
numLen))) {
- numLen++;
- }
- final int j = Integer.parseInt(errorMessage.substring(backslashPos +
1, backslashPos
- + numLen)) - 1;
- int repTokenPos = 0;
- int nextTokenPos = 0;
- for (int l = 0; l <= j; l++) {
- repTokenPos += positions[l];
- }
- if (j <= positions.length) {
- nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
- }
- if (suggestionMatches != null) {
- if (matchCounter < suggestionMatches.size()) {
- numbersToMatches[j] = matchCounter;
- if (suggestionMatches.get(matchCounter) != null) {
- final String[] matches = concatMatches(matchCounter, j,
- firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
- final String leftSide = errorMessage.substring(0, backslashPos);
- final String rightSide = errorMessage.substring(backslashPos +
numLen);
- if (matches.length == 1) {
- errorMessage = leftSide + matches[0] + rightSide;
- } else {
- errorMessage = formatMultipleSynthesis(matches, leftSide,
- rightSide);
- }
- matchCounter++;
- newWay = true;
- }
- } else {
- // FIXME: is this correct? this is how we deal with multiple
matches
- suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
- }
- }
-
- if (!newWay) {
- // in case <match> elements weren't used (yet)
- errorMessage = errorMessage.replace("\\" + (j + 1),
- tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
- }
- }
- errMarker = errorMessage.indexOf('\\');
- numberFollows = false;
- errLen = errorMessage.length();
- if (errMarker >= 0 && errMarker < errLen - 1) {
- numberFollows = StringTools.isPositiveNumber(errorMessage
- .charAt(errMarker + 1));
- }
- }
- return errorMessage;
- }
-
- private static String formatMultipleSynthesis(final String[] matches,
- final String leftSide, final String rightSide) {
- String errorMessage = "";
- String suggestionLeft = "";
- String suggestionRight = "";
- String rightSideNew = rightSide;
- final int sPos = leftSide.lastIndexOf(SUGG_TAG);
- if (sPos > 0) {
- suggestionLeft = leftSide.substring(sPos + SUGG_TAG.length());
- }
- if (StringTools.isEmpty(suggestionLeft)) {
- errorMessage = leftSide;
- } else {
- errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGG_TAG))
- + SUGG_TAG;
- }
- final int rPos = rightSide.indexOf(END_SUGG_TAG);
- if (rPos > 0) {
- suggestionRight = rightSide.substring(0, rPos);
- }
- if (!StringTools.isEmpty(suggestionRight)) {
- rightSideNew = rightSide.substring(rightSide.indexOf(END_SUGG_TAG));
- }
- final int lastLeftSugEnd = leftSide.indexOf(END_SUGG_TAG);
- final int lastLeftSugStart = leftSide.lastIndexOf(SUGG_TAG);
- final StringBuilder sb = new StringBuilder();
- sb.append(errorMessage);
- for (int z = 0; z < matches.length; z++) {
- sb.append(suggestionLeft);
- sb.append(matches[z]);
- sb.append(suggestionRight);
- if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
- sb.append(END_SUGG_TAG);
- sb.append(", ");
- sb.append(SUGG_TAG);
- }
- }
- sb.append(rightSideNew);
- return sb.toString();
- }
-
- /**
- * For testing only.
- */
- public final List<Element> getElements() {
- return patternElements;
- }
-
+
}
Added:
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
===================================================================
---
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
(rev 0)
+++
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
2012-04-01 13:33:36 UTC (rev 6676)
@@ -0,0 +1,413 @@
+package org.languagetool.rules.patterns;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.tools.StringTools;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Matches a pattern rule against plain text.
+ */
+class PatternRuleMatcher {
+
+ private static final String SUGGESTION_START_TAG = "<suggestion>";
+ private static final String SUGGESTION_END_TAG = "</suggestion>";
+
+ private final PatternRule rule;
+ private final boolean useList;
+
+ PatternRuleMatcher(PatternRule rule, boolean useList) {
+ this.rule = rule;
+ this.useList = useList;
+ }
+
+ final RuleMatch[] match(final AnalyzedSentence text)
+ throws IOException {
+ final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+ final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+ final int[] tokenPositions = new int[tokens.length + 1];
+ final int patternSize = rule.patternElements.size();
+ final int limit = Math.max(0, tokens.length - patternSize + 1);
+ Element elem = null;
+ int i = 0;
+ while (i < limit && !(rule.sentStart && i > 0)) {
+ boolean allElementsMatch = false;
+ int firstMatchToken = -1;
+ int lastMatchToken = -1;
+ int matchingTokens = 0;
+ int prevSkipNext = 0;
+ // this variable keeps the total number
+ // of tokens skipped
+ int skipShiftTotal = 0;
+ if (rule.testUnification) {
+ rule.unifier.reset();
+ }
+ for (int k = 0; k < patternSize; k++) {
+ final Element prevElement = elem;
+ elem = rule.patternElements.get(k);
+ rule.setupRef(firstMatchToken, elem, tokens);
+ final int nextPos = i + k + skipShiftTotal;
+ rule.prevMatched = false;
+ if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) {
// SENT_END?
+ prevSkipNext = tokens.length - (nextPos + 1);
+ }
+ final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length -
(patternSize - k));
+ for (int m = nextPos; m <= maxTok; m++) {
+ allElementsMatch = !tokens[m].isImmunized() &&
rule.testAllReadings(tokens, elem, prevElement, m,
+ firstMatchToken, prevSkipNext);
+ if (allElementsMatch) {
+ lastMatchToken = m;
+ final int skipShift = lastMatchToken - nextPos;
+ tokenPositions[matchingTokens] = skipShift + 1;
+ prevSkipNext = translateElementNo(elem.getSkipNext());
+ matchingTokens++;
+ skipShiftTotal += skipShift;
+ if (firstMatchToken == -1) {
+ firstMatchToken = lastMatchToken;
+ }
+ break;
+ }
+ }
+ if (!allElementsMatch) {
+ break;
+ }
+ }
+
+ if (allElementsMatch && matchingTokens == patternSize) {
+ final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens,
+ firstMatchToken, lastMatchToken, matchingTokens);
+ if (ruleMatch != null) {
+ ruleMatches.add(ruleMatch);
+ }
+ }
+ i++;
+ }
+ return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+ }
+
+ private RuleMatch createRuleMatch(final int[] tokenPositions,
+ final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
+ final int lastMatchToken, final int matchingTokens) throws IOException
{
+ final String errMessage = formatMatches(tokens, tokenPositions,
+ firstMatchToken, rule.getMessage());
+ int correctedStPos = 0;
+ if (rule.startPositionCorrection > 0) {
+ for (int l = 0; l <= rule.startPositionCorrection; l++) {
+ correctedStPos += tokenPositions[l];
+ }
+ correctedStPos--;
+ }
+ int correctedEndPos = 0;
+ if (rule.endPositionCorrection < 0) {
+ int l = 0;
+ while (l > rule.endPositionCorrection) {
+ correctedEndPos -= tokenPositions[matchingTokens + l - 1];
+ l--;
+ }
+ }
+ AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
+ + correctedStPos];
+ boolean startsWithUppercase = StringTools
+ .startsWithUppercase(firstMatchTokenObj.getToken())
+ && !matchConvertsCase();
+
+ if (firstMatchTokenObj.isSentStart()
+ && tokens.length > firstMatchToken + correctedStPos + 1) {
+ // make uppercasing work also at sentence start:
+ firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
+ startsWithUppercase =
StringTools.startsWithUppercase(firstMatchTokenObj
+ .getToken());
+ }
+ int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
+ // FIXME: this is fishy, assumes that comma should always come before
+ // whitespace
+ if (errMessage.contains(SUGGESTION_START_TAG + ",")
+ && firstMatchToken + correctedStPos >= 1) {
+ fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
+ + tokens[firstMatchToken + correctedStPos - 1].getToken().length();
+ }
+
+ final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
+ + tokens[lastMatchToken + correctedEndPos].getToken().length();
+ if (fromPos < toPos) { // this can happen with some skip="-1" when the
last
+ // token is not matched
+ return new RuleMatch(rule, fromPos, toPos,
+ errMessage, rule.getShortMessage(), startsWithUppercase);
+ } // failed to create any rule match...
+ return null;
+ }
+
+ /**
+ * Checks if the suggestion starts with a match that is supposed to convert
+ * case. If it does, stop the default conversion to uppercase.
+ *
+ * @return true, if the match converts the case of the token.
+ */
+ private boolean matchConvertsCase() {
+ final List<Match> suggestionMatches = rule.getSuggestionMatches();
+ if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
+ final int sugStart = rule.getMessage().indexOf(SUGGESTION_START_TAG) +
SUGGESTION_START_TAG.length();
+ for (Match sMatch : suggestionMatches) {
+ if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
+ && rule.getMessage().charAt(sugStart) == '\\') {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ /**
+ * Gets the index of the element indexed by i, adding any offsets because of
+ * the phrases in the rule.
+ *
+ * @param i
+ * Current element index.
+ * @return int Index translated into XML element no.
+ */
+ private int translateElementNo(final int i) {
+ if (!useList || i < 0) {
+ return i;
+ }
+ int j = 0;
+ for (int k = 0; k < i; k++) {
+ j += rule.getElementNo().get(k);
+ }
+ return j;
+ }
+
+ /**
+ * Replace back references generated with <match> and \\1 in message
+ * using Match class, and take care of skipping.
+ *
+ * @param tokenReadings
+ * Array of AnalyzedTokenReadings that were matched against the
+ * pattern
+ * @param positions
+ * Array of relative positions of matched tokens
+ * @param firstMatchTok
+ * Position of the first matched token
+ * @param errorMsg
+ * String containing suggestion markup
+ * @return String Formatted message.
+ * @throws IOException
+ */
+ private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
+ final int[] positions, final int firstMatchTok, final String errorMsg)
+ throws IOException {
+ String errorMessage = errorMsg;
+ int matchCounter = 0;
+ final int[] numbersToMatches = new int[errorMsg.length()];
+ boolean newWay = false;
+ int errLen = errorMessage.length();
+ int errMarker = errorMessage.indexOf('\\');
+ boolean numberFollows = false;
+ if (errMarker >= 0 && errMarker < errLen - 1) {
+ numberFollows = StringTools.isPositiveNumber(errorMessage
+ .charAt(errMarker + 1));
+ }
+ while (errMarker >= 0 && numberFollows) {
+ final int backslashPos = errorMessage.indexOf('\\');
+ if (backslashPos >= 0 &&
StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) {
+ int numLen = 1;
+ while (backslashPos + numLen < errorMessage.length()
+ && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos +
numLen))) {
+ numLen++;
+ }
+ final int j = Integer.parseInt(errorMessage.substring(backslashPos +
1, backslashPos
+ + numLen)) - 1;
+ int repTokenPos = 0;
+ int nextTokenPos = 0;
+ for (int l = 0; l <= j; l++) {
+ repTokenPos += positions[l];
+ }
+ if (j <= positions.length) {
+ nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
+ }
+ final List<Match> suggestionMatches = rule.getSuggestionMatches();
+ if (suggestionMatches != null) {
+ if (matchCounter < suggestionMatches.size()) {
+ numbersToMatches[j] = matchCounter;
+ if (suggestionMatches.get(matchCounter) != null) {
+ final String[] matches = concatMatches(matchCounter, j,
+ firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
+ final String leftSide = errorMessage.substring(0, backslashPos);
+ final String rightSide = errorMessage.substring(backslashPos +
numLen);
+ if (matches.length == 1) {
+ errorMessage = leftSide + matches[0] + rightSide;
+ } else {
+ errorMessage = formatMultipleSynthesis(matches, leftSide,
+ rightSide);
+ }
+ matchCounter++;
+ newWay = true;
+ }
+ } else {
+ // FIXME: is this correct? this is how we deal with multiple
matches
+ suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
+ }
+ }
+
+ if (!newWay) {
+ // in case <match> elements weren't used (yet)
+ errorMessage = errorMessage.replace("\\" + (j + 1),
+ tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
+ }
+ }
+ errMarker = errorMessage.indexOf('\\');
+ numberFollows = false;
+ errLen = errorMessage.length();
+ if (errMarker >= 0 && errMarker < errLen - 1) {
+ numberFollows = StringTools.isPositiveNumber(errorMessage
+ .charAt(errMarker + 1));
+ }
+ }
+ return errorMessage;
+ }
+
+ private static String formatMultipleSynthesis(final String[] matches,
+ final String leftSide, final String rightSide) {
+ final String errorMessage;
+ String suggestionLeft = "";
+ String suggestionRight = "";
+ String rightSideNew = rightSide;
+ final int sPos = leftSide.lastIndexOf(SUGGESTION_START_TAG);
+ if (sPos > 0) {
+ suggestionLeft = leftSide.substring(sPos +
SUGGESTION_START_TAG.length());
+ }
+ if (StringTools.isEmpty(suggestionLeft)) {
+ errorMessage = leftSide;
+ } else {
+ errorMessage = leftSide.substring(0,
leftSide.lastIndexOf(SUGGESTION_START_TAG))
+ + SUGGESTION_START_TAG;
+ }
+ final int rPos = rightSide.indexOf(SUGGESTION_END_TAG);
+ if (rPos > 0) {
+ suggestionRight = rightSide.substring(0, rPos);
+ }
+ if (!StringTools.isEmpty(suggestionRight)) {
+ rightSideNew =
rightSide.substring(rightSide.indexOf(SUGGESTION_END_TAG));
+ }
+ final int lastLeftSugEnd = leftSide.indexOf(SUGGESTION_END_TAG);
+ final int lastLeftSugStart = leftSide.lastIndexOf(SUGGESTION_START_TAG);
+ final StringBuilder sb = new StringBuilder();
+ sb.append(errorMessage);
+ for (int z = 0; z < matches.length; z++) {
+ sb.append(suggestionLeft);
+ sb.append(matches[z]);
+ sb.append(suggestionRight);
+ if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
+ sb.append(SUGGESTION_END_TAG);
+ sb.append(", ");
+ sb.append(SUGGESTION_START_TAG);
+ }
+ }
+ sb.append(rightSideNew);
+ return sb.toString();
+ }
+
+ /**
+ * Concatenates the matches, and takes care of phrases (including inflection
+ * using synthesis).
+ *
+ * @param start
+ * Position of the element as referenced by match element in the
+ * rule.
+ * @param index
+ * The index of the element found in the matching sentence.
+ * @param tokenIndex
+ * The position of the token in the AnalyzedTokenReadings array.
+ * @param tokens
+ * Array of @AnalyzedTokenReadings
+ * @return @String[] Array of concatenated strings
+ * @throws IOException
+ * in case disk operations (used in synthesizer) go wrong.
+ */
+ private String[] concatMatches(final int start, final int index,
+ final int tokenIndex, final AnalyzedTokenReadings[] tokens,
+ final int nextTokenPos)
+ throws IOException {
+ String[] finalMatch = null;
+ final List<Match> suggestionMatches = rule.getSuggestionMatches();
+ if (suggestionMatches.get(start) != null) {
+ final int len = phraseLen(index);
+ final Language language = rule.language;
+ if (len == 1) {
+ final int skippedTokens = nextTokenPos - tokenIndex;
+ suggestionMatches.get(start).setToken(tokens, tokenIndex - 1,
skippedTokens);
+ suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
+ finalMatch = suggestionMatches.get(start).toFinalString();
+ } else {
+ final List<String[]> matchList = new ArrayList<String[]>();
+ for (int i = 0; i < len; i++) {
+ final int skippedTokens = nextTokenPos - (tokenIndex + i);
+ suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i,
skippedTokens);
+ suggestionMatches.get(start)
+ .setSynthesizer(language.getSynthesizer());
+ matchList.add(suggestionMatches.get(start).toFinalString());
+ }
+ return combineLists(matchList.toArray(new String[matchList.size()][]),
+ new String[matchList.size()], 0, language);
+ }
+ }
+ return finalMatch;
+ }
+
+ /**
+ * Returns true when the token in the rule references a phrase composed of
+ * many tokens.
+ *
+ * @param i
+ * The index of the token.
+ * @return true if the phrase is under the index, false otherwise.
+ **/
+ private int phraseLen(final int i) {
+ final List<Integer> elementNo = rule.getElementNo();
+ if (!useList || i > (elementNo.size() - 1)) {
+ return 1;
+ }
+ return elementNo.get(i);
+ }
+
+ /**
+ * Creates a Cartesian product of the arrays stored in the input array.
+ *
+ * @param input
+ * Array of string arrays to combine.
+ * @param output
+ * Work array of strings.
+ * @param r
+ * Starting parameter (use 0 to get all combinations).
+ * @param lang
+ * Text language for adding spaces in some languages.
+ * @return Combined array of @String.
+ */
+ private static String[] combineLists(final String[][] input,
+ final String[] output, final int r, final Language lang) {
+ final List<String> outputList = new ArrayList<String>();
+ if (r == input.length) {
+ final StringBuilder sb = new StringBuilder();
+ for (int k = 0; k < output.length; k++) {
+ sb.append(output[k]);
+ if (k < output.length - 1) {
+ sb.append(StringTools.addSpace(output[k + 1], lang));
+ }
+ }
+ outputList.add(sb.toString());
+ } else {
+ for (int c = 0; c < input[r].length; c++) {
+ output[r] = input[r][c];
+ final String[] sList = combineLists(input, output, r + 1, lang);
+ outputList.addAll(Arrays.asList(sList));
+ }
+ }
+ return outputList.toArray(new String[outputList.size()]);
+ }
+
+}
Modified:
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
===================================================================
---
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
2012-04-01 10:46:03 UTC (rev 6675)
+++
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
2012-04-01 13:33:36 UTC (rev 6676)
@@ -626,7 +626,7 @@
final String left, final String right) throws Exception {
final Class[] argClasses = { String[].class, String.class, String.class };
final Object[] argObjects = { suggestions, left, right };
- return TestTools.callStringStaticMethod(PatternRule.class,
+ return TestTools.callStringStaticMethod(PatternRuleMatcher.class,
"formatMultipleSynthesis", argClasses, argObjects);
}
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
This SF email is sponsosred by:
Try Windows Azure free for 90 days Click Here
http://p.sf.net/sfu/sfd2d-msazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs