This change does not affect the functionality, but hopefully
improves the readability of source codes.

ChangeLog
2006-01-31  Ito Kazumitsu  <[EMAIL PROTECTED]>

        * gnu/regexp/REMatch.java(REMatchList): New inner utility class
        for making a list of REMatch instances.
        * gnu/regexp/RETokenOneOf.java(match): Rewritten using REMatchList.
        * gnu/regexp/RETokenRepeated.java(findDoables): New method.
        (match): Rewritten using REMatchList.
        (matchRest): Rewritten using REMatchList.

Index: classpath/gnu/regexp/REMatch.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v
retrieving revision 1.5
diff -u -r1.5 REMatch.java
--- classpath/gnu/regexp/REMatch.java   31 Jan 2006 14:57:43 -0000      1.5
+++ classpath/gnu/regexp/REMatch.java   31 Jan 2006 16:01:40 -0000
@@ -264,4 +264,42 @@
        if (pos < input.length()) output.append(input.charAt(pos));
        return output.toString();
     }
+
+    static class REMatchList {
+        REMatch head;
+       REMatch tail;
+        REMatchList() {
+           head = tail = null;
+       }
+       /* Not used now. But we may need this some day?
+       void addHead(REMatch newone) {
+            if (head == null) {
+                head = newone;
+                tail = newone;
+                while (tail.next != null) {
+                    tail = tail.next;
+                }
+            }
+           else {
+                REMatch tmp = newone;
+                while (tmp.next != null) tmp = tmp.next;
+                tmp.next = head;
+               head = newone;
+           }
+       }
+       */
+       void addTail(REMatch newone) {
+            if (head == null) {
+                head = newone;
+                tail = newone;
+            }
+            else {
+                tail.next = newone;
+            }
+            while (tail.next != null) {
+                tail = tail.next;
+            }
+       }
+    }
+
 }
Index: classpath/gnu/regexp/RETokenOneOf.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenOneOf.java,v
retrieving revision 1.4
diff -u -r1.4 RETokenOneOf.java
--- classpath/gnu/regexp/RETokenOneOf.java      30 Jan 2006 12:35:54 -0000      
1.4
+++ classpath/gnu/regexp/RETokenOneOf.java      31 Jan 2006 16:01:40 -0000
@@ -94,8 +94,7 @@
   }
 
     private boolean matchP(CharIndexed input, REMatch mymatch) {
-    REMatch newMatch = null;
-    REMatch last = null;
+    REMatch.REMatchList newMatch = new REMatch.REMatchList();
     REToken tk;
     for (int i=0; i < options.size(); i++) {
        // In order that the backtracking can work,
@@ -108,21 +107,15 @@
        tk.subIndex = this.subIndex;
        REMatch tryMatch = (REMatch) mymatch.clone();
        if (tk.match(input, tryMatch)) { // match was successful
-           if (last == null) {
-               newMatch = tryMatch;
-               last = tryMatch;
-           } else {
-               last.next = tryMatch;
-               last = tryMatch;
-           }
+           newMatch.addTail(tryMatch);
        } // is a match
     } // try next option
 
-    if (newMatch != null) {
+    if (newMatch.head != null) {
        // set contents of mymatch equal to newMatch
 
        // try each one that matched
-       mymatch.assignFrom(newMatch);
+       mymatch.assignFrom(newMatch.head);
        return true;
     } else {
        return false;
Index: classpath/gnu/regexp/RETokenRepeated.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenRepeated.java,v
retrieving revision 1.7
diff -u -r1.7 RETokenRepeated.java
--- classpath/gnu/regexp/RETokenRepeated.java   30 Jan 2006 12:35:54 -0000      
1.7
+++ classpath/gnu/regexp/RETokenRepeated.java   31 Jan 2006 16:01:40 -0000
@@ -86,6 +86,27 @@
 
     boolean stopMatchingIfSatisfied = true;
 
+    private static REMatch findDoables(REToken tk,
+                       CharIndexed input, REMatch mymatch) {
+
+           REMatch.REMatchList doables = new REMatch.REMatchList();
+
+           // try next repeat at all possible positions
+           for (REMatch current = mymatch;
+                current != null; current = current.next) {
+               REMatch recurrent = (REMatch) current.clone();
+               int origin = recurrent.index;
+               tk = (REToken) tk.clone();
+               tk.next = tk.uncle = null;
+               if (tk.match(input, recurrent)) {
+                   if (recurrent.index == origin) recurrent.empty = true;
+                   // add all items in current to doables array
+                   doables.addTail(recurrent);
+               }
+           }
+           return doables.head;
+    }
+
     // We do need to save every possible point, but the number of clone()
     // invocations here is really a killer for performance on non-stingy
     // repeat operators.  I'm open to suggestions...
@@ -95,24 +116,8 @@
     // the subexpression back-reference operator allow that?
 
     boolean match(CharIndexed input, REMatch mymatch) {
-       // number of times we've matched so far
-       int numRepeats = 0; 
-       
        // Possible positions for the next repeat to match at
        REMatch newMatch = mymatch;
-       REMatch last = null;
-       REMatch current;
-
-       // Add the '0-repeats' index
-       // positions.elementAt(z) == position [] in input after <<z>> matches
-       Vector positions = new Vector();
-       positions.addElement(newMatch);
-       
-       // Declare variables used in loop
-       REMatch doables;
-       REMatch doablesLast;
-       REMatch recurrent;
-       int lastIndex = mymatch.index;
 
        // {0} needs some special treatment.
        if (alwaysEmpty) {
@@ -126,9 +131,39 @@
            }
        }
 
-       do {
+       // number of times we've matched so far
+       int numRepeats = 0; 
+       
+       REMatch doables;
+       int lastIndex = mymatch.index;
+       boolean emptyMatchFound = false;
+
+       while (numRepeats < min) {
+           doables = findDoables(token, input, newMatch);
+
+           // if none of the possibilities worked out, 
+           // it means that minimum number of repeats could not be found.
+           if (doables == null) return false;
+           
+           // reassign where the next repeat can match
+           newMatch = doables;
+           
+           // increment how many repeats we've successfully found
+           ++numRepeats;
+           
+           if (newMatch.empty) {
+               numRepeats = min;
+               emptyMatchFound = true;
+               break;
+           }
+           lastIndex = newMatch.index;
+       }
+
+       Vector positions = new Vector();
+
+       while (numRepeats <= max) {
            // We want to check something like  
-           //    if (stingy && (numRepeats >= min))
+           //    if (stingy)
            // and neglect the further matching.  But experience tells
            // such neglection may cause incomplete matching.
            // For example, if we neglect the seemingly unnecessay
@@ -137,138 +172,71 @@
            // matching, /(([a-c])b*?\2)*/ matches "ababbbcbc"
            // entirely when we wan to find only "ababb".
            // In order to make regression tests pass, we do as we did.
-           if (stopMatchingIfSatisfied && stingy && (numRepeats >= min)) {
-               REMatch result = matchRest(input, newMatch);
-               if (result != null) {
-                   mymatch.assignFrom(result);
-                   return true;
-               }
-           }
-
-           doables = null;
-           doablesLast = null;
-
-           // try next repeat at all possible positions
-           for (current = newMatch; current != null; current = current.next) {
-               recurrent = (REMatch) current.clone();
-               int origin = recurrent.index;
-               if (token.match(input, recurrent)) {
-                   if (recurrent.index == origin) recurrent.empty = true;
-                   // add all items in current to doables array
-                   if (doables == null) {
-                       doables = recurrent;
-                       doablesLast = recurrent;
-                   } else {
-                       // Order these from longest to shortest
-                       // Start by assuming longest (more repeats)
-                       doablesLast.next = recurrent;
-                   }
-                   // Find new doablesLast
-                   while (doablesLast.next != null) {
-                       doablesLast = doablesLast.next;
-                   }
+           if (stopMatchingIfSatisfied && stingy) {
+               REMatch results = matchRest(input, newMatch);
+               if (results != null) {
+                   mymatch.assignFrom(results);
+                   return true;
                }
            }
-           // if none of the possibilities worked out, break out of do/while
+           positions.add(newMatch);
+           if (emptyMatchFound) break;
+
+           doables = findDoables(token, input, newMatch);
            if (doables == null) break;
-           
-           // reassign where the next repeat can match
-           newMatch = doables;
-           
-           // increment how many repeats we've successfully found
-           ++numRepeats;
-           
-           positions.addElement(newMatch);
 
            // doables.index == lastIndex occurs either
            //   (1) when an empty string was the longest
            //       that matched this token.
-           //       And this case occurs either
-           //         (1-1) when this token is always empty,
-           //               for example "()" or "(())".
-           //         (1-2) when this token is not always empty
-           //               but can match an empty string, for example,
-           //               "a*", "(a|)".
            // or
            //   (2) when the same string matches this token many times.
            //       For example, "acbab" itself matches "a.*b" and
            //       its substrings "acb" and "ab" also match.
+           //       In this case, we do not have to go further until
+           //       numRepeats == max because the more numRepeats grows,
+           //       the shorter the substring matching this token becomes.
+           //       So the previous succesful match must have bee the best
+           //       match.  But this is not necessarily the case if stingy.
            if (doables.index == lastIndex) {
                if (doables.empty) {
-                   // Case (1): We break here, otherwise we will fall
-                   //          into an endless loop.
-                   if (numRepeats < min) numRepeats = min;
-                   break;
-               }
+                   emptyMatchFound = true;
+                }
                else {
-                   // Case (2): We cannot break here because, for example,
-                   // "acbacb" matches "a.*b" up to 2 times but
-                   // not 3 times.  So we have to check numRepeats >= min.
-                   // But we do not have to go further until numRepeats == max
-                   // because the more numRepeats grows, the shorter the
-                   // substring matching this token becomes. 
-                   if (numRepeats > min) {
-                       // This means the previous match was successful,
-                       // and that must be the best match.  This match
-                       // resulted in shortening the matched substring.
-                       numRepeats--;
-                       positions.remove(positions.size() - 1);
-                       break;
-                   }
-                   if (numRepeats == min) break;
+                   if (!stingy) break;
                }
-           }           
-           lastIndex = doables.index;
-       } while (numRepeats < max);
-       
-       // If there aren't enough repeats, then fail
-       if (numRepeats < min) return false;
-       
-       // We're greedy, but ease off until a true match is found 
-       int posIndex = positions.size();
-       
+           }
+           numRepeats++;
+           newMatch = doables;
+           lastIndex = newMatch.index;
+       }
+
+       // We're greedy, but ease off until a true match is found.
        // At this point we've either got too many or just the right amount.
        // See if this numRepeats works with the rest of the regexp.
-       REMatch allResults = null;
-       REMatch allResultsLast = null;
 
-       REMatch results = null;
-       int indexCount = posIndex - min;
-       if (indexCount <= 0) {
-           // This case occurs when we exited the previous do loop before
-           // numRepeats >= min because an empty string matched the token.
-           // In this case, an empty string can match as many times as
-           // desired.
-           indexCount = 1;
-       }
-       if (stingy) {
-           posIndex = (posIndex - indexCount - 1);
-       }
-       while (indexCount-- > 0) {
-           if (stingy) ++posIndex; else --posIndex;
-           newMatch = (REMatch) positions.elementAt(posIndex);
-           results = matchRest(input, newMatch);
-           if (results != null) {
-               if (allResults == null) {
-                   allResults = results;
-                   allResultsLast = results;
-               } else {
-                   // Order these from longest to shortest
-                   // Start by assuming longest (more repeats)
-                   // If stingy the order is shortest to longest.
-                   allResultsLast.next = results;
-               }
-               // Find new doablesLast
-               while (allResultsLast.next != null) {
-                   allResultsLast = allResultsLast.next;
-               }
+       REMatch.REMatchList allResults = new REMatch.REMatchList();
+
+       int posCount = positions.size();
+       int posIndex = (stingy ? 0 : posCount - 1);
+
+       while (posCount-- > 0) {
+           REMatch m = (REMatch) positions.elementAt(posIndex);
+            if (stingy) posIndex++; else posIndex--;
+
+           REMatch results = matchRest(input, m);
+            if (results != null) {
+               // Order these from longest to shortest
+               // Start by assuming longest (more repeats)
+               // If stingy the order is shortest to longest.
+               allResults.addTail(results);
+           }
+           else {
+               if (possessive) break;
            }
-           // else did not match rest of the tokens, try again on smaller 
sample
-           // or break out when performing possessive matching
-           if (possessive) break;
        }
-       if (allResults != null) {
-           mymatch.assignFrom(allResults); // does this get all?
+
+       if (allResults.head != null) {
+           mymatch.assignFrom(allResults.head); // does this get all?
            return true;
        }
        // If we fall out, no matches.
@@ -277,27 +245,17 @@
 
     private REMatch matchRest(CharIndexed input, final REMatch newMatch) {
        REMatch current, single;
-       REMatch doneIndex = null;
-       REMatch doneIndexLast = null;
+       REMatch.REMatchList doneIndex = new REMatch.REMatchList();
        // Test all possible matches for this number of repeats
        for (current = newMatch; current != null; current = current.next) {
            // clone() separates a single match from the chain
            single = (REMatch) current.clone();
            if (next(input, single)) {
                // chain results to doneIndex
-               if (doneIndex == null) {
-                   doneIndex = single;
-                   doneIndexLast = single;
-               } else {
-                   doneIndexLast.next = single;
-               }
-               // Find new doneIndexLast
-               while (doneIndexLast.next != null) {
-                   doneIndexLast = doneIndexLast.next;
-               }
+               doneIndex.addTail(single);
            }
        }
-       return doneIndex;
+       return doneIndex.head;
     }
 
     void dump(StringBuffer os) {

Reply via email to