Revision: 6676 http://languagetool.svn.sourceforge.net/languagetool/?rev=6676&view=rev Author: dnaber Date: 2012-04-01 13:33:36 +0000 (Sun, 01 Apr 2012) Log Message: ----------- break up PatternRule into two classes
Modified Paths: -------------- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java Added Paths: ----------- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java Modified: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java 2012-04-01 10:46:03 UTC (rev 6675) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java 2012-04-01 13:33:36 UTC (rev 6676) @@ -18,17 +18,15 @@ */ package org.languagetool.rules.patterns; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.List; - import org.languagetool.AnalyzedSentence; -import org.languagetool.AnalyzedTokenReadings; import org.languagetool.Language; import org.languagetool.rules.RuleMatch; import org.languagetool.tools.StringTools; +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + /** * A Rule that describes a language error as a simple pattern of words or of * part-of-speech tags. @@ -37,24 +35,21 @@ */ public class PatternRule extends AbstractPatternRule { - private static final String SUGG_TAG = "<suggestion>"; - private static final String END_SUGG_TAG = "</suggestion>"; + private final String shortMessage; - private String subId; // because there can be more than one rule in a rule group + /** + * A list of elements as they appear in XML file (phrases count as single + * tokens in case of matches or skipping). + */ + private final List<Integer> elementNo; + private String subId; // because there can be more than one rule in a rule group private String message; - private String shortMessage; /** Formatted suggestion elements. **/ private List<Match> suggestionMatches; /** - * A list of elements as they appear in XML file (phrases count as single - * tokens in case of matches or skipping). - */ - private List<Integer> elementNo; - - /** * This property is used for short-circuiting evaluation of the elementNo list * order. */ @@ -85,19 +80,10 @@ final List<Element> elements, final String description, final String message, final String shortMessage) { super(id, description, language, elements, false); - if (id == null) { - throw new NullPointerException("id cannot be null"); - } - if (language == null) { - throw new NullPointerException("language cannot be null"); - } - if (elements == null) { - throw new NullPointerException("elements cannot be null"); - } - if (description == null) { - throw new NullPointerException("description/name cannot be null"); - } - + if (id == null) throw new NullPointerException("id cannot be null"); + if (language == null) throw new NullPointerException("language cannot be null"); + if (elements == null) throw new NullPointerException("elements cannot be null"); + if (description == null) throw new NullPointerException("description/name cannot be null"); this.message = message; this.shortMessage = shortMessage; this.elementNo = new ArrayList<Integer>(); @@ -113,7 +99,6 @@ useList = true; } else { elementNo.add(cnt); - prevName = ""; curName = ""; cnt = 0; } @@ -194,141 +179,11 @@ } @Override - public final RuleMatch[] match(final AnalyzedSentence text) - throws IOException { - final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); - final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace(); - final int[] tokenPositions = new int[tokens.length + 1]; - final int patternSize = patternElements.size(); - final int limit = Math.max(0, tokens.length - patternSize + 1); - Element elem = null; - int i = 0; - while (i < limit && !(sentStart && i > 0)) { - boolean allElementsMatch = false; - int firstMatchToken = -1; - int lastMatchToken = -1; - int matchingTokens = 0; - int prevSkipNext = 0; - // this variable keeps the total number - // of tokens skipped - int skipShiftTotal = 0; - if (testUnification) { - unifier.reset(); - } - for (int k = 0; k < patternSize; k++) { - final Element prevElement = elem; - elem = patternElements.get(k); - setupRef(firstMatchToken, elem, tokens); - final int nextPos = i + k + skipShiftTotal; - prevMatched = false; - if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // SENT_END? - prevSkipNext = tokens.length - (nextPos + 1); - } - final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - (patternSize - k)); - for (int m = nextPos; m <= maxTok; m++) { - allElementsMatch = !tokens[m].isImmunized() && testAllReadings(tokens, elem, prevElement, m, - firstMatchToken, prevSkipNext); - if (allElementsMatch) { - lastMatchToken = m; - final int skipShift = lastMatchToken - nextPos; - tokenPositions[matchingTokens] = skipShift + 1; - prevSkipNext = translateElementNo(elem.getSkipNext()); - matchingTokens++; - skipShiftTotal += skipShift; - if (firstMatchToken == -1) { - firstMatchToken = lastMatchToken; - } - break; - } - } - if (!allElementsMatch) { - break; - } - } - - if (allElementsMatch && matchingTokens == patternSize) { - final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens, - firstMatchToken, lastMatchToken, matchingTokens); - if (ruleMatch != null) { - ruleMatches.add(ruleMatch); - } - } - i++; - } - return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]); + public final RuleMatch[] match(final AnalyzedSentence text) throws IOException { + final PatternRuleMatcher matcher = new PatternRuleMatcher(this, useList); + return matcher.match(text); } - private RuleMatch createRuleMatch(final int[] tokenPositions, - final AnalyzedTokenReadings[] tokens, final int firstMatchToken, - final int lastMatchToken, final int matchingTokens) throws IOException { - final String errMessage = formatMatches(tokens, tokenPositions, - firstMatchToken, message); - int correctedStPos = 0; - if (startPositionCorrection > 0) { - for (int l = 0; l <= startPositionCorrection; l++) { - correctedStPos += tokenPositions[l]; - } - correctedStPos--; - } - int correctedEndPos = 0; - if (endPositionCorrection < 0) { - int l = 0; - while (l > endPositionCorrection) { - correctedEndPos -= tokenPositions[matchingTokens + l - 1]; - l--; - } - } - AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken - + correctedStPos]; - boolean startsWithUppercase = StringTools - .startsWithUppercase(firstMatchTokenObj.getToken()) - && !matchConvertsCase(); - - if (firstMatchTokenObj.isSentStart() - && tokens.length > firstMatchToken + correctedStPos + 1) { - // make uppercasing work also at sentence start: - firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1]; - startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj - .getToken()); - } - int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos(); - // FIXME: this is fishy, assumes that comma should always come before - // whitespace - if (errMessage.contains(SUGG_TAG + ",") - && firstMatchToken + correctedStPos >= 1) { - fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos() - + tokens[firstMatchToken + correctedStPos - 1].getToken().length(); - } - - final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos() - + tokens[lastMatchToken + correctedEndPos].getToken().length(); - if (fromPos < toPos) { // this can happen with some skip="-1" when the last - // token is not matched - return new RuleMatch(this, fromPos, toPos, - errMessage, shortMessage, startsWithUppercase); - } // failed to create any rule match... - return null; - } - - /** - * Checks if the suggestion starts with a match that is supposed to convert - * case. If it does, stop the default conversion to uppercase. - * - * @return true, if the match converts the case of the token. - */ - private boolean matchConvertsCase() { - if (suggestionMatches != null && !suggestionMatches.isEmpty()) { - final int sugStart = message.indexOf(SUGG_TAG) + SUGG_TAG.length(); - for (Match sMatch : suggestionMatches) { - if (!sMatch.isInMessageOnly() && sMatch.convertsCase() - && message.charAt(sugStart) == '\\') { - return true; - } - } - } - return false; - } - public final void addSuggestionMatch(final Match m) { if (suggestionMatches == null) { suggestionMatches = new ArrayList<Match>(); @@ -337,253 +192,23 @@ } /** - * Gets the index of the element indexed by i, adding any offsets because of - * the phrases in the rule. - * - * @param i - * Current element index. - * @return int Index translated into XML element no. + * For testing only. */ - private int translateElementNo(final int i) { - if (!useList || i < 0) { - return i; - } - int j = 0; - for (int k = 0; k < i; k++) { - j += elementNo.get(k); - } - return j; + public final List<Element> getElements() { + return patternElements; } - /** - * Returns true when the token in the rule references a phrase composed of - * many tokens. - * - * @param i - * The index of the token. - * @return true if the phrase is under the index, false otherwise. - **/ - private int phraseLen(final int i) { - if (!useList || i > (elementNo.size() - 1)) { - return 1; - } - return elementNo.get(i); + List<Integer> getElementNo() { + return elementNo; } - /** - * Creates a Cartesian product of the arrays stored in the input array. - * - * @param input - * Array of string arrays to combine. - * @param output - * Work array of strings. - * @param r - * Starting parameter (use 0 to get all combinations). - * @param lang - * Text language for adding spaces in some languages. - * @return Combined array of @String. - */ - private static String[] combineLists(final String[][] input, - final String[] output, final int r, final Language lang) { - final List<String> outputList = new ArrayList<String>(); - if (r == input.length) { - final StringBuilder sb = new StringBuilder(); - for (int k = 0; k < output.length; k++) { - sb.append(output[k]); - if (k < output.length - 1) { - sb.append(StringTools.addSpace(output[k + 1], lang)); - } - } - outputList.add(sb.toString()); - } else { - for (int c = 0; c < input[r].length; c++) { - output[r] = input[r][c]; - final String[] sList = combineLists(input, output, r + 1, lang); - outputList.addAll(Arrays.asList(sList)); - } - } - return outputList.toArray(new String[outputList.size()]); + String getShortMessage() { + return shortMessage; } - - /** - * Concatenates the matches, and takes care of phrases (including inflection - * using synthesis). - * - * @param start - * Position of the element as referenced by match element in the - * rule. - * @param index - * The index of the element found in the matching sentence. - * @param tokenIndex - * The position of the token in the AnalyzedTokenReadings array. - * @param tokens - * Array of @AnalyzedTokenReadings - * @return @String[] Array of concatenated strings - * @throws IOException - * in case disk operations (used in synthesizer) go wrong. - */ - private String[] concatMatches(final int start, final int index, - final int tokenIndex, final AnalyzedTokenReadings[] tokens, - final int nextTokenPos) - throws IOException { - String[] finalMatch = null; - if (suggestionMatches.get(start) != null) { - final int len = phraseLen(index); - if (len == 1) { - final int skippedTokens = nextTokenPos - tokenIndex; - suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, skippedTokens); - suggestionMatches.get(start).setSynthesizer(language.getSynthesizer()); - finalMatch = suggestionMatches.get(start).toFinalString(); - } else { - final List<String[]> matchList = new ArrayList<String[]>(); - for (int i = 0; i < len; i++) { - final int skippedTokens = nextTokenPos - (tokenIndex + i); - suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, skippedTokens); - suggestionMatches.get(start) - .setSynthesizer(language.getSynthesizer()); - matchList.add(suggestionMatches.get(start).toFinalString()); - } - return combineLists(matchList.toArray(new String[matchList.size()][]), - new String[matchList.size()], 0, language); - } - } - return finalMatch; + + List<Match> getSuggestionMatches() { + return suggestionMatches; } - /** - * Replace back references generated with <match> and \\1 in message - * using Match class, and take care of skipping. - * - * @param tokenReadings - * Array of AnalyzedTokenReadings that were matched against the - * pattern - * @param positions - * Array of relative positions of matched tokens - * @param firstMatchTok - * Position of the first matched token - * @param errorMsg - * String containing suggestion markup - * @return String Formatted message. - * @throws IOException - */ - private String formatMatches(final AnalyzedTokenReadings[] tokenReadings, - final int[] positions, final int firstMatchTok, final String errorMsg) - throws IOException { - String errorMessage = errorMsg; - int matchCounter = 0; - final int[] numbersToMatches = new int[errorMsg.length()]; - boolean newWay = false; - int errLen = errorMessage.length(); - int errMarker = errorMessage.indexOf('\\'); - boolean numberFollows = false; - if (errMarker >= 0 && errMarker < errLen - 1) { - numberFollows = StringTools.isPositiveNumber(errorMessage - .charAt(errMarker + 1)); - } - while (errMarker >= 0 && numberFollows) { - final int backslashPos = errorMessage.indexOf('\\'); - if (backslashPos >= 0 && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) { - int numLen = 1; - while (backslashPos + numLen < errorMessage.length() - && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + numLen))) { - numLen++; - } - final int j = Integer.parseInt(errorMessage.substring(backslashPos + 1, backslashPos - + numLen)) - 1; - int repTokenPos = 0; - int nextTokenPos = 0; - for (int l = 0; l <= j; l++) { - repTokenPos += positions[l]; - } - if (j <= positions.length) { - nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1]; - } - if (suggestionMatches != null) { - if (matchCounter < suggestionMatches.size()) { - numbersToMatches[j] = matchCounter; - if (suggestionMatches.get(matchCounter) != null) { - final String[] matches = concatMatches(matchCounter, j, - firstMatchTok + repTokenPos, tokenReadings, nextTokenPos); - final String leftSide = errorMessage.substring(0, backslashPos); - final String rightSide = errorMessage.substring(backslashPos + numLen); - if (matches.length == 1) { - errorMessage = leftSide + matches[0] + rightSide; - } else { - errorMessage = formatMultipleSynthesis(matches, leftSide, - rightSide); - } - matchCounter++; - newWay = true; - } - } else { - // FIXME: is this correct? this is how we deal with multiple matches - suggestionMatches.add(suggestionMatches.get(numbersToMatches[j])); - } - } - - if (!newWay) { - // in case <match> elements weren't used (yet) - errorMessage = errorMessage.replace("\\" + (j + 1), - tokenReadings[firstMatchTok + repTokenPos - 1].getToken()); - } - } - errMarker = errorMessage.indexOf('\\'); - numberFollows = false; - errLen = errorMessage.length(); - if (errMarker >= 0 && errMarker < errLen - 1) { - numberFollows = StringTools.isPositiveNumber(errorMessage - .charAt(errMarker + 1)); - } - } - return errorMessage; - } - - private static String formatMultipleSynthesis(final String[] matches, - final String leftSide, final String rightSide) { - String errorMessage = ""; - String suggestionLeft = ""; - String suggestionRight = ""; - String rightSideNew = rightSide; - final int sPos = leftSide.lastIndexOf(SUGG_TAG); - if (sPos > 0) { - suggestionLeft = leftSide.substring(sPos + SUGG_TAG.length()); - } - if (StringTools.isEmpty(suggestionLeft)) { - errorMessage = leftSide; - } else { - errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGG_TAG)) - + SUGG_TAG; - } - final int rPos = rightSide.indexOf(END_SUGG_TAG); - if (rPos > 0) { - suggestionRight = rightSide.substring(0, rPos); - } - if (!StringTools.isEmpty(suggestionRight)) { - rightSideNew = rightSide.substring(rightSide.indexOf(END_SUGG_TAG)); - } - final int lastLeftSugEnd = leftSide.indexOf(END_SUGG_TAG); - final int lastLeftSugStart = leftSide.lastIndexOf(SUGG_TAG); - final StringBuilder sb = new StringBuilder(); - sb.append(errorMessage); - for (int z = 0; z < matches.length; z++) { - sb.append(suggestionLeft); - sb.append(matches[z]); - sb.append(suggestionRight); - if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) { - sb.append(END_SUGG_TAG); - sb.append(", "); - sb.append(SUGG_TAG); - } - } - sb.append(rightSideNew); - return sb.toString(); - } - - /** - * For testing only. - */ - public final List<Element> getElements() { - return patternElements; - } - + } Added: trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java =================================================================== --- trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java (rev 0) +++ trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java 2012-04-01 13:33:36 UTC (rev 6676) @@ -0,0 +1,413 @@ +package org.languagetool.rules.patterns; + +import org.languagetool.AnalyzedSentence; +import org.languagetool.AnalyzedTokenReadings; +import org.languagetool.Language; +import org.languagetool.rules.RuleMatch; +import org.languagetool.tools.StringTools; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +/** + * Matches a pattern rule against plain text. + */ +class PatternRuleMatcher { + + private static final String SUGGESTION_START_TAG = "<suggestion>"; + private static final String SUGGESTION_END_TAG = "</suggestion>"; + + private final PatternRule rule; + private final boolean useList; + + PatternRuleMatcher(PatternRule rule, boolean useList) { + this.rule = rule; + this.useList = useList; + } + + final RuleMatch[] match(final AnalyzedSentence text) + throws IOException { + final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>(); + final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace(); + final int[] tokenPositions = new int[tokens.length + 1]; + final int patternSize = rule.patternElements.size(); + final int limit = Math.max(0, tokens.length - patternSize + 1); + Element elem = null; + int i = 0; + while (i < limit && !(rule.sentStart && i > 0)) { + boolean allElementsMatch = false; + int firstMatchToken = -1; + int lastMatchToken = -1; + int matchingTokens = 0; + int prevSkipNext = 0; + // this variable keeps the total number + // of tokens skipped + int skipShiftTotal = 0; + if (rule.testUnification) { + rule.unifier.reset(); + } + for (int k = 0; k < patternSize; k++) { + final Element prevElement = elem; + elem = rule.patternElements.get(k); + rule.setupRef(firstMatchToken, elem, tokens); + final int nextPos = i + k + skipShiftTotal; + rule.prevMatched = false; + if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // SENT_END? + prevSkipNext = tokens.length - (nextPos + 1); + } + final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - (patternSize - k)); + for (int m = nextPos; m <= maxTok; m++) { + allElementsMatch = !tokens[m].isImmunized() && rule.testAllReadings(tokens, elem, prevElement, m, + firstMatchToken, prevSkipNext); + if (allElementsMatch) { + lastMatchToken = m; + final int skipShift = lastMatchToken - nextPos; + tokenPositions[matchingTokens] = skipShift + 1; + prevSkipNext = translateElementNo(elem.getSkipNext()); + matchingTokens++; + skipShiftTotal += skipShift; + if (firstMatchToken == -1) { + firstMatchToken = lastMatchToken; + } + break; + } + } + if (!allElementsMatch) { + break; + } + } + + if (allElementsMatch && matchingTokens == patternSize) { + final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens, + firstMatchToken, lastMatchToken, matchingTokens); + if (ruleMatch != null) { + ruleMatches.add(ruleMatch); + } + } + i++; + } + return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]); + } + + private RuleMatch createRuleMatch(final int[] tokenPositions, + final AnalyzedTokenReadings[] tokens, final int firstMatchToken, + final int lastMatchToken, final int matchingTokens) throws IOException { + final String errMessage = formatMatches(tokens, tokenPositions, + firstMatchToken, rule.getMessage()); + int correctedStPos = 0; + if (rule.startPositionCorrection > 0) { + for (int l = 0; l <= rule.startPositionCorrection; l++) { + correctedStPos += tokenPositions[l]; + } + correctedStPos--; + } + int correctedEndPos = 0; + if (rule.endPositionCorrection < 0) { + int l = 0; + while (l > rule.endPositionCorrection) { + correctedEndPos -= tokenPositions[matchingTokens + l - 1]; + l--; + } + } + AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken + + correctedStPos]; + boolean startsWithUppercase = StringTools + .startsWithUppercase(firstMatchTokenObj.getToken()) + && !matchConvertsCase(); + + if (firstMatchTokenObj.isSentStart() + && tokens.length > firstMatchToken + correctedStPos + 1) { + // make uppercasing work also at sentence start: + firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1]; + startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj + .getToken()); + } + int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos(); + // FIXME: this is fishy, assumes that comma should always come before + // whitespace + if (errMessage.contains(SUGGESTION_START_TAG + ",") + && firstMatchToken + correctedStPos >= 1) { + fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos() + + tokens[firstMatchToken + correctedStPos - 1].getToken().length(); + } + + final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos() + + tokens[lastMatchToken + correctedEndPos].getToken().length(); + if (fromPos < toPos) { // this can happen with some skip="-1" when the last + // token is not matched + return new RuleMatch(rule, fromPos, toPos, + errMessage, rule.getShortMessage(), startsWithUppercase); + } // failed to create any rule match... + return null; + } + + /** + * Checks if the suggestion starts with a match that is supposed to convert + * case. If it does, stop the default conversion to uppercase. + * + * @return true, if the match converts the case of the token. + */ + private boolean matchConvertsCase() { + final List<Match> suggestionMatches = rule.getSuggestionMatches(); + if (suggestionMatches != null && !suggestionMatches.isEmpty()) { + final int sugStart = rule.getMessage().indexOf(SUGGESTION_START_TAG) + SUGGESTION_START_TAG.length(); + for (Match sMatch : suggestionMatches) { + if (!sMatch.isInMessageOnly() && sMatch.convertsCase() + && rule.getMessage().charAt(sugStart) == '\\') { + return true; + } + } + } + return false; + } + + /** + * Gets the index of the element indexed by i, adding any offsets because of + * the phrases in the rule. + * + * @param i + * Current element index. + * @return int Index translated into XML element no. + */ + private int translateElementNo(final int i) { + if (!useList || i < 0) { + return i; + } + int j = 0; + for (int k = 0; k < i; k++) { + j += rule.getElementNo().get(k); + } + return j; + } + + /** + * Replace back references generated with <match> and \\1 in message + * using Match class, and take care of skipping. + * + * @param tokenReadings + * Array of AnalyzedTokenReadings that were matched against the + * pattern + * @param positions + * Array of relative positions of matched tokens + * @param firstMatchTok + * Position of the first matched token + * @param errorMsg + * String containing suggestion markup + * @return String Formatted message. + * @throws IOException + */ + private String formatMatches(final AnalyzedTokenReadings[] tokenReadings, + final int[] positions, final int firstMatchTok, final String errorMsg) + throws IOException { + String errorMessage = errorMsg; + int matchCounter = 0; + final int[] numbersToMatches = new int[errorMsg.length()]; + boolean newWay = false; + int errLen = errorMessage.length(); + int errMarker = errorMessage.indexOf('\\'); + boolean numberFollows = false; + if (errMarker >= 0 && errMarker < errLen - 1) { + numberFollows = StringTools.isPositiveNumber(errorMessage + .charAt(errMarker + 1)); + } + while (errMarker >= 0 && numberFollows) { + final int backslashPos = errorMessage.indexOf('\\'); + if (backslashPos >= 0 && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) { + int numLen = 1; + while (backslashPos + numLen < errorMessage.length() + && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + numLen))) { + numLen++; + } + final int j = Integer.parseInt(errorMessage.substring(backslashPos + 1, backslashPos + + numLen)) - 1; + int repTokenPos = 0; + int nextTokenPos = 0; + for (int l = 0; l <= j; l++) { + repTokenPos += positions[l]; + } + if (j <= positions.length) { + nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1]; + } + final List<Match> suggestionMatches = rule.getSuggestionMatches(); + if (suggestionMatches != null) { + if (matchCounter < suggestionMatches.size()) { + numbersToMatches[j] = matchCounter; + if (suggestionMatches.get(matchCounter) != null) { + final String[] matches = concatMatches(matchCounter, j, + firstMatchTok + repTokenPos, tokenReadings, nextTokenPos); + final String leftSide = errorMessage.substring(0, backslashPos); + final String rightSide = errorMessage.substring(backslashPos + numLen); + if (matches.length == 1) { + errorMessage = leftSide + matches[0] + rightSide; + } else { + errorMessage = formatMultipleSynthesis(matches, leftSide, + rightSide); + } + matchCounter++; + newWay = true; + } + } else { + // FIXME: is this correct? this is how we deal with multiple matches + suggestionMatches.add(suggestionMatches.get(numbersToMatches[j])); + } + } + + if (!newWay) { + // in case <match> elements weren't used (yet) + errorMessage = errorMessage.replace("\\" + (j + 1), + tokenReadings[firstMatchTok + repTokenPos - 1].getToken()); + } + } + errMarker = errorMessage.indexOf('\\'); + numberFollows = false; + errLen = errorMessage.length(); + if (errMarker >= 0 && errMarker < errLen - 1) { + numberFollows = StringTools.isPositiveNumber(errorMessage + .charAt(errMarker + 1)); + } + } + return errorMessage; + } + + private static String formatMultipleSynthesis(final String[] matches, + final String leftSide, final String rightSide) { + final String errorMessage; + String suggestionLeft = ""; + String suggestionRight = ""; + String rightSideNew = rightSide; + final int sPos = leftSide.lastIndexOf(SUGGESTION_START_TAG); + if (sPos > 0) { + suggestionLeft = leftSide.substring(sPos + SUGGESTION_START_TAG.length()); + } + if (StringTools.isEmpty(suggestionLeft)) { + errorMessage = leftSide; + } else { + errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGGESTION_START_TAG)) + + SUGGESTION_START_TAG; + } + final int rPos = rightSide.indexOf(SUGGESTION_END_TAG); + if (rPos > 0) { + suggestionRight = rightSide.substring(0, rPos); + } + if (!StringTools.isEmpty(suggestionRight)) { + rightSideNew = rightSide.substring(rightSide.indexOf(SUGGESTION_END_TAG)); + } + final int lastLeftSugEnd = leftSide.indexOf(SUGGESTION_END_TAG); + final int lastLeftSugStart = leftSide.lastIndexOf(SUGGESTION_START_TAG); + final StringBuilder sb = new StringBuilder(); + sb.append(errorMessage); + for (int z = 0; z < matches.length; z++) { + sb.append(suggestionLeft); + sb.append(matches[z]); + sb.append(suggestionRight); + if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) { + sb.append(SUGGESTION_END_TAG); + sb.append(", "); + sb.append(SUGGESTION_START_TAG); + } + } + sb.append(rightSideNew); + return sb.toString(); + } + + /** + * Concatenates the matches, and takes care of phrases (including inflection + * using synthesis). + * + * @param start + * Position of the element as referenced by match element in the + * rule. + * @param index + * The index of the element found in the matching sentence. + * @param tokenIndex + * The position of the token in the AnalyzedTokenReadings array. + * @param tokens + * Array of @AnalyzedTokenReadings + * @return @String[] Array of concatenated strings + * @throws IOException + * in case disk operations (used in synthesizer) go wrong. + */ + private String[] concatMatches(final int start, final int index, + final int tokenIndex, final AnalyzedTokenReadings[] tokens, + final int nextTokenPos) + throws IOException { + String[] finalMatch = null; + final List<Match> suggestionMatches = rule.getSuggestionMatches(); + if (suggestionMatches.get(start) != null) { + final int len = phraseLen(index); + final Language language = rule.language; + if (len == 1) { + final int skippedTokens = nextTokenPos - tokenIndex; + suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, skippedTokens); + suggestionMatches.get(start).setSynthesizer(language.getSynthesizer()); + finalMatch = suggestionMatches.get(start).toFinalString(); + } else { + final List<String[]> matchList = new ArrayList<String[]>(); + for (int i = 0; i < len; i++) { + final int skippedTokens = nextTokenPos - (tokenIndex + i); + suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, skippedTokens); + suggestionMatches.get(start) + .setSynthesizer(language.getSynthesizer()); + matchList.add(suggestionMatches.get(start).toFinalString()); + } + return combineLists(matchList.toArray(new String[matchList.size()][]), + new String[matchList.size()], 0, language); + } + } + return finalMatch; + } + + /** + * Returns true when the token in the rule references a phrase composed of + * many tokens. + * + * @param i + * The index of the token. + * @return true if the phrase is under the index, false otherwise. + **/ + private int phraseLen(final int i) { + final List<Integer> elementNo = rule.getElementNo(); + if (!useList || i > (elementNo.size() - 1)) { + return 1; + } + return elementNo.get(i); + } + + /** + * Creates a Cartesian product of the arrays stored in the input array. + * + * @param input + * Array of string arrays to combine. + * @param output + * Work array of strings. + * @param r + * Starting parameter (use 0 to get all combinations). + * @param lang + * Text language for adding spaces in some languages. + * @return Combined array of @String. + */ + private static String[] combineLists(final String[][] input, + final String[] output, final int r, final Language lang) { + final List<String> outputList = new ArrayList<String>(); + if (r == input.length) { + final StringBuilder sb = new StringBuilder(); + for (int k = 0; k < output.length; k++) { + sb.append(output[k]); + if (k < output.length - 1) { + sb.append(StringTools.addSpace(output[k + 1], lang)); + } + } + outputList.add(sb.toString()); + } else { + for (int c = 0; c < input[r].length; c++) { + output[r] = input[r][c]; + final String[] sList = combineLists(input, output, r + 1, lang); + outputList.addAll(Arrays.asList(sList)); + } + } + return outputList.toArray(new String[outputList.size()]); + } + +} Modified: trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java =================================================================== --- trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java 2012-04-01 10:46:03 UTC (rev 6675) +++ trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java 2012-04-01 13:33:36 UTC (rev 6676) @@ -626,7 +626,7 @@ final String left, final String right) throws Exception { final Class[] argClasses = { String[].class, String.class, String.class }; final Object[] argObjects = { suggestions, left, right }; - return TestTools.callStringStaticMethod(PatternRule.class, + return TestTools.callStringStaticMethod(PatternRuleMatcher.class, "formatMultipleSynthesis", argClasses, argObjects); } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ This SF email is sponsosred by: Try Windows Azure free for 90 days Click Here http://p.sf.net/sfu/sfd2d-msazure _______________________________________________ Languagetool-cvs mailing list Languagetool-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/languagetool-cvs