[LanguageTool] SF.net SVN: languagetool:[6676] trunk/JLanguageTool/src

dnaber Sun, 01 Apr 2012 06:33:49 -0700

Revision: 6676
          
http://languagetool.svn.sourceforge.net/languagetool/?rev=6676&view=rev
Author:   dnaber
Date:     2012-04-01 13:33:36 +0000 (Sun, 01 Apr 2012)
Log Message:
-----------
break up PatternRule into two classes


Modified Paths:
--------------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
    
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java

Added Paths:
-----------
    
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java

Modified: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java   
    2012-04-01 10:46:03 UTC (rev 6675)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRule.java   
    2012-04-01 13:33:36 UTC (rev 6676)
@@ -18,17 +18,15 @@
  */
 package org.languagetool.rules.patterns;
 
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
-
 import org.languagetool.AnalyzedSentence;
-import org.languagetool.AnalyzedTokenReadings;
 import org.languagetool.Language;
 import org.languagetool.rules.RuleMatch;
 import org.languagetool.tools.StringTools;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
 /**
  * A Rule that describes a language error as a simple pattern of words or of
  * part-of-speech tags.
@@ -37,24 +35,21 @@
  */
 public class PatternRule extends AbstractPatternRule {
 
-  private static final String SUGG_TAG = "<suggestion>";
-  private static final String END_SUGG_TAG = "</suggestion>";
+  private final String shortMessage;
 
-  private String subId; // because there can be more than one rule in a rule 
group
+  /**
+   * A list of elements as they appear in XML file (phrases count as single
+   * tokens in case of matches or skipping).
+   */
+  private final List<Integer> elementNo;
 
+  private String subId; // because there can be more than one rule in a rule 
group
   private String message;
-  private String shortMessage;
 
   /** Formatted suggestion elements. **/
   private List<Match> suggestionMatches;
 
   /**
-   * A list of elements as they appear in XML file (phrases count as single
-   * tokens in case of matches or skipping).
-   */
-  private List<Integer> elementNo;
-
-  /**
    * This property is used for short-circuiting evaluation of the elementNo 
list
    * order.
    */
@@ -85,19 +80,10 @@
       final List<Element> elements, final String description,
       final String message, final String shortMessage) {
     super(id, description, language, elements, false);
-    if (id == null) {
-      throw new NullPointerException("id cannot be null");
-    }
-    if (language == null) {
-      throw new NullPointerException("language cannot be null");
-    }
-    if (elements == null) {
-      throw new NullPointerException("elements cannot be null");
-    }
-    if (description == null) {
-      throw new NullPointerException("description/name cannot be null");
-    }
-
+    if (id == null) throw new NullPointerException("id cannot be null");
+    if (language == null) throw new NullPointerException("language cannot be 
null");
+    if (elements == null) throw new NullPointerException("elements cannot be 
null");
+    if (description == null) throw new NullPointerException("description/name 
cannot be null");
     this.message = message;
     this.shortMessage = shortMessage;
     this.elementNo = new ArrayList<Integer>();
@@ -113,7 +99,6 @@
           useList = true;
         } else {
           elementNo.add(cnt);
-          prevName = "";
           curName = "";
           cnt = 0;
         }
@@ -194,141 +179,11 @@
   }
 
   @Override
-  public final RuleMatch[] match(final AnalyzedSentence text)
-  throws IOException {
-    final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
-    final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
-    final int[] tokenPositions = new int[tokens.length + 1];
-    final int patternSize = patternElements.size();
-    final int limit = Math.max(0, tokens.length - patternSize + 1);
-    Element elem = null;
-    int i = 0;
-    while (i < limit && !(sentStart && i > 0)) {
-      boolean allElementsMatch = false;
-      int firstMatchToken = -1;
-      int lastMatchToken = -1;
-      int matchingTokens = 0;
-      int prevSkipNext = 0;
-      // this variable keeps the total number
-      // of tokens skipped
-      int skipShiftTotal = 0;
-      if (testUnification) {
-        unifier.reset();
-      }
-      for (int k = 0; k < patternSize; k++) {
-        final Element prevElement = elem;
-        elem = patternElements.get(k);
-        setupRef(firstMatchToken, elem, tokens);
-        final int nextPos = i + k + skipShiftTotal;
-        prevMatched = false;
-        if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { // 
SENT_END?
-          prevSkipNext = tokens.length - (nextPos + 1);
-        }        
-        final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - 
(patternSize - k));
-        for (int m = nextPos; m <= maxTok; m++) {          
-          allElementsMatch = !tokens[m].isImmunized() && 
testAllReadings(tokens, elem, prevElement, m,
-              firstMatchToken, prevSkipNext);
-          if (allElementsMatch) {
-            lastMatchToken = m;
-            final int skipShift = lastMatchToken - nextPos;
-            tokenPositions[matchingTokens] = skipShift + 1;
-            prevSkipNext = translateElementNo(elem.getSkipNext());
-            matchingTokens++;
-            skipShiftTotal += skipShift;
-            if (firstMatchToken == -1) {
-              firstMatchToken = lastMatchToken;              
-            }
-            break;
-          }          
-        }
-        if (!allElementsMatch) {
-          break;
-        }
-      }
-
-      if (allElementsMatch && matchingTokens == patternSize) {
-        final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens,
-            firstMatchToken, lastMatchToken, matchingTokens);
-        if (ruleMatch != null) {
-          ruleMatches.add(ruleMatch);
-        }
-      }
-      i++;
-    }
-    return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+  public final RuleMatch[] match(final AnalyzedSentence text) throws 
IOException {
+    final PatternRuleMatcher matcher = new PatternRuleMatcher(this, useList);
+    return matcher.match(text);
   }
 
-  private RuleMatch createRuleMatch(final int[] tokenPositions,
-      final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
-      final int lastMatchToken, final int matchingTokens) throws IOException {
-    final String errMessage = formatMatches(tokens, tokenPositions,
-        firstMatchToken, message);
-    int correctedStPos = 0;
-    if (startPositionCorrection > 0) {
-      for (int l = 0; l <= startPositionCorrection; l++) {
-        correctedStPos += tokenPositions[l];
-      }
-      correctedStPos--;
-    }
-    int correctedEndPos = 0;
-    if (endPositionCorrection < 0) {
-      int l = 0;
-      while (l > endPositionCorrection) {
-        correctedEndPos -= tokenPositions[matchingTokens + l - 1];
-        l--;
-      }
-    }
-    AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
-                                                      + correctedStPos];
-    boolean startsWithUppercase = StringTools
-    .startsWithUppercase(firstMatchTokenObj.getToken())
-    && !matchConvertsCase();
-
-    if (firstMatchTokenObj.isSentStart()
-        && tokens.length > firstMatchToken + correctedStPos + 1) {
-      // make uppercasing work also at sentence start:
-      firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
-      startsWithUppercase = StringTools.startsWithUppercase(firstMatchTokenObj
-          .getToken());
-    }
-    int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
-    // FIXME: this is fishy, assumes that comma should always come before
-    // whitespace
-    if (errMessage.contains(SUGG_TAG + ",")
-        && firstMatchToken + correctedStPos >= 1) {
-      fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
-      + tokens[firstMatchToken + correctedStPos - 1].getToken().length();
-    }
-
-    final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
-    + tokens[lastMatchToken + correctedEndPos].getToken().length();
-    if (fromPos < toPos) { // this can happen with some skip="-1" when the last
-      // token is not matched
-      return new RuleMatch(this, fromPos, toPos,
-          errMessage, shortMessage, startsWithUppercase);
-    } // failed to create any rule match...
-    return null;
-  }
-
-  /**
-   * Checks if the suggestion starts with a match that is supposed to convert
-   * case. If it does, stop the default conversion to uppercase.
-   * 
-   * @return true, if the match converts the case of the token.
-   */
-  private boolean matchConvertsCase() {
-    if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
-      final int sugStart = message.indexOf(SUGG_TAG) + SUGG_TAG.length();
-      for (Match sMatch : suggestionMatches) {
-        if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
-                && message.charAt(sugStart) == '\\') {
-          return true;
-        }
-      }
-    }
-    return false;
-  }
-
   public final void addSuggestionMatch(final Match m) {
     if (suggestionMatches == null) {
       suggestionMatches = new ArrayList<Match>();
@@ -337,253 +192,23 @@
   }
 
   /**
-   * Gets the index of the element indexed by i, adding any offsets because of
-   * the phrases in the rule.
-   * 
-   * @param i
-   *          Current element index.
-   * @return int Index translated into XML element no.
+   * For testing only.
    */
-  private int translateElementNo(final int i) {
-    if (!useList || i < 0) {
-      return i;
-    }
-    int j = 0;
-    for (int k = 0; k < i; k++) {
-      j += elementNo.get(k);
-    }
-    return j;
+  public final List<Element> getElements() {
+    return patternElements;
   }
 
-  /**
-   * Returns true when the token in the rule references a phrase composed of
-   * many tokens.
-   * 
-   * @param i
-   *          The index of the token.
-   * @return true if the phrase is under the index, false otherwise.
-   **/
-  private int phraseLen(final int i) {
-    if (!useList || i > (elementNo.size() - 1)) {
-      return 1;
-    }
-    return elementNo.get(i);
+  List<Integer> getElementNo() {
+    return elementNo;
   }
 
-  /**
-   * Creates a Cartesian product of the arrays stored in the input array.
-   * 
-   * @param input
-   *          Array of string arrays to combine.
-   * @param output
-   *          Work array of strings.
-   * @param r
-   *          Starting parameter (use 0 to get all combinations).
-   * @param lang
-   *          Text language for adding spaces in some languages.
-   * @return Combined array of @String.
-   */
-  private static String[] combineLists(final String[][] input,
-      final String[] output, final int r, final Language lang) {
-    final List<String> outputList = new ArrayList<String>();
-    if (r == input.length) {
-      final StringBuilder sb = new StringBuilder();
-      for (int k = 0; k < output.length; k++) {
-        sb.append(output[k]);
-        if (k < output.length - 1) {
-          sb.append(StringTools.addSpace(output[k + 1], lang));
-        }
-      }
-      outputList.add(sb.toString());
-    } else {
-      for (int c = 0; c < input[r].length; c++) {
-        output[r] = input[r][c];
-        final String[] sList = combineLists(input, output, r + 1, lang);
-        outputList.addAll(Arrays.asList(sList));
-      }
-    }
-    return outputList.toArray(new String[outputList.size()]);
+  String getShortMessage() {
+    return shortMessage;
   }
-
-  /**
-   * Concatenates the matches, and takes care of phrases (including inflection
-   * using synthesis).
-   * 
-   * @param start
-   *          Position of the element as referenced by match element in the
-   *          rule.
-   * @param index
-   *          The index of the element found in the matching sentence.
-   * @param tokenIndex
-   *          The position of the token in the AnalyzedTokenReadings array.
-   * @param tokens
-   *          Array of @AnalyzedTokenReadings
-   * @return @String[] Array of concatenated strings
-   * @throws IOException
-   *           in case disk operations (used in synthesizer) go wrong.
-   */
-  private String[] concatMatches(final int start, final int index,
-      final int tokenIndex, final AnalyzedTokenReadings[] tokens,
-      final int nextTokenPos)
-  throws IOException {
-    String[] finalMatch = null;
-    if (suggestionMatches.get(start) != null) {
-      final int len = phraseLen(index);
-      if (len == 1) {
-        final int skippedTokens = nextTokenPos - tokenIndex;
-        suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, 
skippedTokens);
-        suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
-        finalMatch = suggestionMatches.get(start).toFinalString();
-      } else {
-        final List<String[]> matchList = new ArrayList<String[]>();
-        for (int i = 0; i < len; i++) {
-          final int skippedTokens = nextTokenPos - (tokenIndex + i);
-          suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, 
skippedTokens);          
-          suggestionMatches.get(start)
-          .setSynthesizer(language.getSynthesizer());
-          matchList.add(suggestionMatches.get(start).toFinalString());
-        }
-        return combineLists(matchList.toArray(new String[matchList.size()][]),
-            new String[matchList.size()], 0, language);
-      }
-    }
-    return finalMatch;
+  
+  List<Match> getSuggestionMatches() {
+    return suggestionMatches;
   }
 
-  /**
-   * Replace back references generated with &lt;match&gt; and \\1 in message
-   * using Match class, and take care of skipping.
-   * 
-   * @param tokenReadings
-   *          Array of AnalyzedTokenReadings that were matched against the
-   *          pattern
-   * @param positions
-   *          Array of relative positions of matched tokens
-   * @param firstMatchTok
-   *          Position of the first matched token
-   * @param errorMsg
-   *          String containing suggestion markup
-   * @return String Formatted message.
-   * @throws IOException
-   */
-  private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
-      final int[] positions, final int firstMatchTok, final String errorMsg)
-  throws IOException {
-    String errorMessage = errorMsg;
-    int matchCounter = 0;
-    final int[] numbersToMatches = new int[errorMsg.length()];
-    boolean newWay = false;
-    int errLen = errorMessage.length();
-    int errMarker = errorMessage.indexOf('\\');
-    boolean numberFollows = false;
-    if (errMarker >= 0 && errMarker < errLen - 1) {
-      numberFollows = StringTools.isPositiveNumber(errorMessage
-          .charAt(errMarker + 1));
-    }
-    while (errMarker >= 0 && numberFollows) {
-      final int backslashPos = errorMessage.indexOf('\\');
-      if (backslashPos >= 0 && 
StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) {
-        int numLen = 1;
-        while (backslashPos + numLen < errorMessage.length()
-            && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 
numLen))) {
-          numLen++;
-        }
-        final int j = Integer.parseInt(errorMessage.substring(backslashPos + 
1, backslashPos
-            + numLen)) - 1;
-        int repTokenPos = 0;
-        int nextTokenPos = 0;
-        for (int l = 0; l <= j; l++) {
-          repTokenPos += positions[l];
-        }
-        if (j <= positions.length) {
-          nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
-        }
-        if (suggestionMatches != null) {
-          if (matchCounter < suggestionMatches.size()) {
-            numbersToMatches[j] = matchCounter;
-            if (suggestionMatches.get(matchCounter) != null) {
-              final String[] matches = concatMatches(matchCounter, j,
-                  firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
-              final String leftSide = errorMessage.substring(0, backslashPos);
-              final String rightSide = errorMessage.substring(backslashPos + 
numLen);
-              if (matches.length == 1) {
-                errorMessage = leftSide + matches[0] + rightSide;
-              } else {
-                errorMessage = formatMultipleSynthesis(matches, leftSide,
-                    rightSide);
-              }
-              matchCounter++;
-              newWay = true;
-            }
-          } else {
-            // FIXME: is this correct? this is how we deal with multiple 
matches
-            suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
-          }
-        }
-
-        if (!newWay) {
-          // in case <match> elements weren't used (yet)
-          errorMessage = errorMessage.replace("\\" + (j + 1),
-              tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
-        }
-      }
-      errMarker = errorMessage.indexOf('\\');
-      numberFollows = false;
-      errLen = errorMessage.length();
-      if (errMarker >= 0 && errMarker < errLen - 1) {
-        numberFollows = StringTools.isPositiveNumber(errorMessage
-            .charAt(errMarker + 1));
-      }
-    }
-    return errorMessage;
-  }
-
-  private static String formatMultipleSynthesis(final String[] matches,
-      final String leftSide, final String rightSide) {
-    String errorMessage = "";
-    String suggestionLeft = "";
-    String suggestionRight = "";
-    String rightSideNew = rightSide;
-    final int sPos = leftSide.lastIndexOf(SUGG_TAG);
-    if (sPos > 0) {
-      suggestionLeft = leftSide.substring(sPos + SUGG_TAG.length());
-    }
-    if (StringTools.isEmpty(suggestionLeft)) {
-      errorMessage = leftSide;
-    } else {
-      errorMessage = leftSide.substring(0, leftSide.lastIndexOf(SUGG_TAG))
-      + SUGG_TAG;
-    }
-    final int rPos = rightSide.indexOf(END_SUGG_TAG);
-    if (rPos > 0) {
-      suggestionRight = rightSide.substring(0, rPos);
-    }
-    if (!StringTools.isEmpty(suggestionRight)) {
-      rightSideNew = rightSide.substring(rightSide.indexOf(END_SUGG_TAG));
-    }
-    final int lastLeftSugEnd = leftSide.indexOf(END_SUGG_TAG);
-    final int lastLeftSugStart = leftSide.lastIndexOf(SUGG_TAG);
-    final StringBuilder sb = new StringBuilder();
-    sb.append(errorMessage);
-    for (int z = 0; z < matches.length; z++) {
-      sb.append(suggestionLeft);
-      sb.append(matches[z]);
-      sb.append(suggestionRight);
-      if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
-        sb.append(END_SUGG_TAG);
-        sb.append(", ");
-        sb.append(SUGG_TAG);
-      }
-    }
-    sb.append(rightSideNew);
-    return sb.toString();
-  }
-
-  /**
-   * For testing only.
-   */
-  public final List<Element> getElements() {
-    return patternElements;
-  }
-
+  
 }

Added: 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
===================================================================
--- 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
                                (rev 0)
+++ 
trunk/JLanguageTool/src/java/org/languagetool/rules/patterns/PatternRuleMatcher.java
        2012-04-01 13:33:36 UTC (rev 6676)
@@ -0,0 +1,413 @@
+package org.languagetool.rules.patterns;
+
+import org.languagetool.AnalyzedSentence;
+import org.languagetool.AnalyzedTokenReadings;
+import org.languagetool.Language;
+import org.languagetool.rules.RuleMatch;
+import org.languagetool.tools.StringTools;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
+/**
+ * Matches a pattern rule against plain text.
+ */
+class PatternRuleMatcher {
+
+  private static final String SUGGESTION_START_TAG = "<suggestion>";
+  private static final String SUGGESTION_END_TAG = "</suggestion>";
+  
+  private final PatternRule rule;
+  private final boolean useList;
+
+  PatternRuleMatcher(PatternRule rule, boolean useList) {
+    this.rule = rule;
+    this.useList = useList;
+  }
+
+  final RuleMatch[] match(final AnalyzedSentence text)
+    throws IOException {
+      final List<RuleMatch> ruleMatches = new ArrayList<RuleMatch>();
+      final AnalyzedTokenReadings[] tokens = text.getTokensWithoutWhitespace();
+      final int[] tokenPositions = new int[tokens.length + 1];
+      final int patternSize = rule.patternElements.size();
+      final int limit = Math.max(0, tokens.length - patternSize + 1);
+      Element elem = null;
+      int i = 0;
+      while (i < limit && !(rule.sentStart && i > 0)) {
+        boolean allElementsMatch = false;
+        int firstMatchToken = -1;
+        int lastMatchToken = -1;
+        int matchingTokens = 0;
+        int prevSkipNext = 0;
+        // this variable keeps the total number
+        // of tokens skipped
+        int skipShiftTotal = 0;
+        if (rule.testUnification) {
+          rule.unifier.reset();
+        }
+        for (int k = 0; k < patternSize; k++) {
+          final Element prevElement = elem;
+          elem = rule.patternElements.get(k);
+          rule.setupRef(firstMatchToken, elem, tokens);
+          final int nextPos = i + k + skipShiftTotal;
+          rule.prevMatched = false;
+          if (prevSkipNext + nextPos >= tokens.length || prevSkipNext < 0) { 
// SENT_END?
+            prevSkipNext = tokens.length - (nextPos + 1);
+          }
+          final int maxTok = Math.min(nextPos + prevSkipNext, tokens.length - 
(patternSize - k));
+          for (int m = nextPos; m <= maxTok; m++) {
+            allElementsMatch = !tokens[m].isImmunized() && 
rule.testAllReadings(tokens, elem, prevElement, m,
+                    firstMatchToken, prevSkipNext);
+            if (allElementsMatch) {
+              lastMatchToken = m;
+              final int skipShift = lastMatchToken - nextPos;
+              tokenPositions[matchingTokens] = skipShift + 1;
+              prevSkipNext = translateElementNo(elem.getSkipNext());
+              matchingTokens++;
+              skipShiftTotal += skipShift;
+              if (firstMatchToken == -1) {
+                firstMatchToken = lastMatchToken;
+              }
+              break;
+            }
+          }
+          if (!allElementsMatch) {
+            break;
+          }
+        }
+
+        if (allElementsMatch && matchingTokens == patternSize) {
+          final RuleMatch ruleMatch = createRuleMatch(tokenPositions, tokens,
+              firstMatchToken, lastMatchToken, matchingTokens);
+          if (ruleMatch != null) {
+            ruleMatches.add(ruleMatch);
+          }
+        }
+        i++;
+      }
+      return ruleMatches.toArray(new RuleMatch[ruleMatches.size()]);
+    }
+
+  private RuleMatch createRuleMatch(final int[] tokenPositions,
+        final AnalyzedTokenReadings[] tokens, final int firstMatchToken,
+        final int lastMatchToken, final int matchingTokens) throws IOException 
{
+      final String errMessage = formatMatches(tokens, tokenPositions,
+          firstMatchToken, rule.getMessage());
+      int correctedStPos = 0;
+      if (rule.startPositionCorrection > 0) {
+        for (int l = 0; l <= rule.startPositionCorrection; l++) {
+          correctedStPos += tokenPositions[l];
+        }
+        correctedStPos--;
+      }
+      int correctedEndPos = 0;
+      if (rule.endPositionCorrection < 0) {
+        int l = 0;
+        while (l > rule.endPositionCorrection) {
+          correctedEndPos -= tokenPositions[matchingTokens + l - 1];
+          l--;
+        }
+      }
+      AnalyzedTokenReadings firstMatchTokenObj = tokens[firstMatchToken
+                                                        + correctedStPos];
+      boolean startsWithUppercase = StringTools
+      .startsWithUppercase(firstMatchTokenObj.getToken())
+      && !matchConvertsCase();
+
+      if (firstMatchTokenObj.isSentStart()
+          && tokens.length > firstMatchToken + correctedStPos + 1) {
+        // make uppercasing work also at sentence start:
+        firstMatchTokenObj = tokens[firstMatchToken + correctedStPos + 1];
+        startsWithUppercase = 
StringTools.startsWithUppercase(firstMatchTokenObj
+            .getToken());
+      }
+      int fromPos = tokens[firstMatchToken + correctedStPos].getStartPos();
+      // FIXME: this is fishy, assumes that comma should always come before
+      // whitespace
+      if (errMessage.contains(SUGGESTION_START_TAG + ",")
+          && firstMatchToken + correctedStPos >= 1) {
+        fromPos = tokens[firstMatchToken + correctedStPos - 1].getStartPos()
+        + tokens[firstMatchToken + correctedStPos - 1].getToken().length();
+      }
+
+      final int toPos = tokens[lastMatchToken + correctedEndPos].getStartPos()
+      + tokens[lastMatchToken + correctedEndPos].getToken().length();
+      if (fromPos < toPos) { // this can happen with some skip="-1" when the 
last
+        // token is not matched
+        return new RuleMatch(rule, fromPos, toPos,
+            errMessage, rule.getShortMessage(), startsWithUppercase);
+      } // failed to create any rule match...
+      return null;
+    }
+
+  /**
+   * Checks if the suggestion starts with a match that is supposed to convert
+   * case. If it does, stop the default conversion to uppercase.
+   *
+   * @return true, if the match converts the case of the token.
+   */
+  private boolean matchConvertsCase() {
+    final List<Match> suggestionMatches = rule.getSuggestionMatches();
+    if (suggestionMatches != null && !suggestionMatches.isEmpty()) {
+      final int sugStart = rule.getMessage().indexOf(SUGGESTION_START_TAG) + 
SUGGESTION_START_TAG.length();
+      for (Match sMatch : suggestionMatches) {
+        if (!sMatch.isInMessageOnly() && sMatch.convertsCase()
+                && rule.getMessage().charAt(sugStart) == '\\') {
+          return true;
+        }
+      }
+    }
+    return false;
+  }
+
+  /**
+   * Gets the index of the element indexed by i, adding any offsets because of
+   * the phrases in the rule.
+   *
+   * @param i
+   *          Current element index.
+   * @return int Index translated into XML element no.
+   */
+  private int translateElementNo(final int i) {
+    if (!useList || i < 0) {
+      return i;
+    }
+    int j = 0;
+    for (int k = 0; k < i; k++) {
+      j += rule.getElementNo().get(k);
+    }
+    return j;
+  }
+
+  /**
+   * Replace back references generated with &lt;match&gt; and \\1 in message
+   * using Match class, and take care of skipping.
+   *
+   * @param tokenReadings
+   *          Array of AnalyzedTokenReadings that were matched against the
+   *          pattern
+   * @param positions
+   *          Array of relative positions of matched tokens
+   * @param firstMatchTok
+   *          Position of the first matched token
+   * @param errorMsg
+   *          String containing suggestion markup
+   * @return String Formatted message.
+   * @throws IOException
+   */
+  private String formatMatches(final AnalyzedTokenReadings[] tokenReadings,
+      final int[] positions, final int firstMatchTok, final String errorMsg)
+  throws IOException {
+    String errorMessage = errorMsg;
+    int matchCounter = 0;
+    final int[] numbersToMatches = new int[errorMsg.length()];
+    boolean newWay = false;
+    int errLen = errorMessage.length();
+    int errMarker = errorMessage.indexOf('\\');
+    boolean numberFollows = false;
+    if (errMarker >= 0 && errMarker < errLen - 1) {
+      numberFollows = StringTools.isPositiveNumber(errorMessage
+          .charAt(errMarker + 1));
+    }
+    while (errMarker >= 0 && numberFollows) {
+      final int backslashPos = errorMessage.indexOf('\\');
+      if (backslashPos >= 0 && 
StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 1))) {
+        int numLen = 1;
+        while (backslashPos + numLen < errorMessage.length()
+            && StringTools.isPositiveNumber(errorMessage.charAt(backslashPos + 
numLen))) {
+          numLen++;
+        }
+        final int j = Integer.parseInt(errorMessage.substring(backslashPos + 
1, backslashPos
+            + numLen)) - 1;
+        int repTokenPos = 0;
+        int nextTokenPos = 0;
+        for (int l = 0; l <= j; l++) {
+          repTokenPos += positions[l];
+        }
+        if (j <= positions.length) {
+          nextTokenPos = firstMatchTok + repTokenPos + positions[j + 1];
+        }
+        final List<Match> suggestionMatches = rule.getSuggestionMatches();
+        if (suggestionMatches != null) {
+          if (matchCounter < suggestionMatches.size()) {
+            numbersToMatches[j] = matchCounter;
+            if (suggestionMatches.get(matchCounter) != null) {
+              final String[] matches = concatMatches(matchCounter, j,
+                  firstMatchTok + repTokenPos, tokenReadings, nextTokenPos);
+              final String leftSide = errorMessage.substring(0, backslashPos);
+              final String rightSide = errorMessage.substring(backslashPos + 
numLen);
+              if (matches.length == 1) {
+                errorMessage = leftSide + matches[0] + rightSide;
+              } else {
+                errorMessage = formatMultipleSynthesis(matches, leftSide,
+                    rightSide);
+              }
+              matchCounter++;
+              newWay = true;
+            }
+          } else {
+            // FIXME: is this correct? this is how we deal with multiple 
matches
+            suggestionMatches.add(suggestionMatches.get(numbersToMatches[j]));
+          }
+        }
+
+        if (!newWay) {
+          // in case <match> elements weren't used (yet)
+          errorMessage = errorMessage.replace("\\" + (j + 1),
+              tokenReadings[firstMatchTok + repTokenPos - 1].getToken());
+        }
+      }
+      errMarker = errorMessage.indexOf('\\');
+      numberFollows = false;
+      errLen = errorMessage.length();
+      if (errMarker >= 0 && errMarker < errLen - 1) {
+        numberFollows = StringTools.isPositiveNumber(errorMessage
+            .charAt(errMarker + 1));
+      }
+    }
+    return errorMessage;
+  }
+
+  private static String formatMultipleSynthesis(final String[] matches,
+      final String leftSide, final String rightSide) {
+    final String errorMessage;
+    String suggestionLeft = "";
+    String suggestionRight = "";
+    String rightSideNew = rightSide;
+    final int sPos = leftSide.lastIndexOf(SUGGESTION_START_TAG);
+    if (sPos > 0) {
+      suggestionLeft = leftSide.substring(sPos + 
SUGGESTION_START_TAG.length());
+    }
+    if (StringTools.isEmpty(suggestionLeft)) {
+      errorMessage = leftSide;
+    } else {
+      errorMessage = leftSide.substring(0, 
leftSide.lastIndexOf(SUGGESTION_START_TAG))
+      + SUGGESTION_START_TAG;
+    }
+    final int rPos = rightSide.indexOf(SUGGESTION_END_TAG);
+    if (rPos > 0) {
+      suggestionRight = rightSide.substring(0, rPos);
+    }
+    if (!StringTools.isEmpty(suggestionRight)) {
+      rightSideNew = 
rightSide.substring(rightSide.indexOf(SUGGESTION_END_TAG));
+    }
+    final int lastLeftSugEnd = leftSide.indexOf(SUGGESTION_END_TAG);
+    final int lastLeftSugStart = leftSide.lastIndexOf(SUGGESTION_START_TAG);
+    final StringBuilder sb = new StringBuilder();
+    sb.append(errorMessage);
+    for (int z = 0; z < matches.length; z++) {
+      sb.append(suggestionLeft);
+      sb.append(matches[z]);
+      sb.append(suggestionRight);
+      if ((z < matches.length - 1) && lastLeftSugEnd < lastLeftSugStart) {
+        sb.append(SUGGESTION_END_TAG);
+        sb.append(", ");
+        sb.append(SUGGESTION_START_TAG);
+      }
+    }
+    sb.append(rightSideNew);
+    return sb.toString();
+  }
+
+  /**
+   * Concatenates the matches, and takes care of phrases (including inflection
+   * using synthesis).
+   *
+   * @param start
+   *          Position of the element as referenced by match element in the
+   *          rule.
+   * @param index
+   *          The index of the element found in the matching sentence.
+   * @param tokenIndex
+   *          The position of the token in the AnalyzedTokenReadings array.
+   * @param tokens
+   *          Array of @AnalyzedTokenReadings
+   * @return @String[] Array of concatenated strings
+   * @throws IOException
+   *           in case disk operations (used in synthesizer) go wrong.
+   */
+  private String[] concatMatches(final int start, final int index,
+      final int tokenIndex, final AnalyzedTokenReadings[] tokens,
+      final int nextTokenPos)
+  throws IOException {
+    String[] finalMatch = null;
+    final List<Match> suggestionMatches = rule.getSuggestionMatches();
+    if (suggestionMatches.get(start) != null) {
+      final int len = phraseLen(index);
+      final Language language = rule.language;
+      if (len == 1) {
+        final int skippedTokens = nextTokenPos - tokenIndex;
+        suggestionMatches.get(start).setToken(tokens, tokenIndex - 1, 
skippedTokens);
+        suggestionMatches.get(start).setSynthesizer(language.getSynthesizer());
+        finalMatch = suggestionMatches.get(start).toFinalString();
+      } else {
+        final List<String[]> matchList = new ArrayList<String[]>();
+        for (int i = 0; i < len; i++) {
+          final int skippedTokens = nextTokenPos - (tokenIndex + i);
+          suggestionMatches.get(start).setToken(tokens, tokenIndex - 1 + i, 
skippedTokens);
+          suggestionMatches.get(start)
+                  .setSynthesizer(language.getSynthesizer());
+          matchList.add(suggestionMatches.get(start).toFinalString());
+        }
+        return combineLists(matchList.toArray(new String[matchList.size()][]),
+            new String[matchList.size()], 0, language);
+      }
+    }
+    return finalMatch;
+  }
+
+  /**
+   * Returns true when the token in the rule references a phrase composed of
+   * many tokens.
+   *
+   * @param i
+   *          The index of the token.
+   * @return true if the phrase is under the index, false otherwise.
+   **/
+  private int phraseLen(final int i) {
+    final List<Integer> elementNo = rule.getElementNo();
+    if (!useList || i > (elementNo.size() - 1)) {
+      return 1;
+    }
+    return elementNo.get(i);
+  }
+
+  /**
+   * Creates a Cartesian product of the arrays stored in the input array.
+   *
+   * @param input
+   *          Array of string arrays to combine.
+   * @param output
+   *          Work array of strings.
+   * @param r
+   *          Starting parameter (use 0 to get all combinations).
+   * @param lang
+   *          Text language for adding spaces in some languages.
+   * @return Combined array of @String.
+   */
+  private static String[] combineLists(final String[][] input,
+      final String[] output, final int r, final Language lang) {
+    final List<String> outputList = new ArrayList<String>();
+    if (r == input.length) {
+      final StringBuilder sb = new StringBuilder();
+      for (int k = 0; k < output.length; k++) {
+        sb.append(output[k]);
+        if (k < output.length - 1) {
+          sb.append(StringTools.addSpace(output[k + 1], lang));
+        }
+      }
+      outputList.add(sb.toString());
+    } else {
+      for (int c = 0; c < input[r].length; c++) {
+        output[r] = input[r][c];
+        final String[] sList = combineLists(input, output, r + 1, lang);
+        outputList.addAll(Arrays.asList(sList));
+      }
+    }
+    return outputList.toArray(new String[outputList.size()]);
+  }
+
+}

Modified: 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
===================================================================
--- 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
   2012-04-01 10:46:03 UTC (rev 6675)
+++ 
trunk/JLanguageTool/src/test/org/languagetool/rules/patterns/PatternRuleTest.java
   2012-04-01 13:33:36 UTC (rev 6676)
@@ -626,7 +626,7 @@
       final String left, final String right) throws Exception {
     final Class[] argClasses = { String[].class, String.class, String.class };
     final Object[] argObjects = { suggestions, left, right };
-    return TestTools.callStringStaticMethod(PatternRule.class,
+    return TestTools.callStringStaticMethod(PatternRuleMatcher.class,
         "formatMultipleSynthesis", argClasses, argObjects);
   }
 

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
This SF email is sponsosred by:
Try Windows Azure free for 90 days Click Here 
http://p.sf.net/sfu/sfd2d-msazure
_______________________________________________
Languagetool-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/languagetool-cvs

[LanguageTool] SF.net SVN: languagetool:[6676] trunk/JLanguageTool/src

Reply via email to