Hi,

> I have made another fix so that the followings pass.

I have added some touches, and the good news is that
all the test cases remaining in Mauve's
gnu/testlet/java/util/regex/Pattern/testdata2
pass now.

ChangeLog:
2006-03-24  Ito Kazumitsu  <[EMAIL PROTECTED]>

        * gnu/regexp/CharIndexed.java(setLastMatch, getLastMatch, getAnchor):
        New methods.
        * gnu/regexp/CharIndexedCharArray.java(setLastMatch, getLastMatch,
        getAnchor): New methods.
        * gnu/regexp/CharIndexedInputStream.java(setLastMatch, getLastMatch,
        getAnchor): New methods.
        * gnu/regexp/CharIndexedString.java(setLastMatch, getLastMatch,
        getAnchor): New methods.
        * gnu/regexp/CharIndexedStringBuffer.java(setLastMatch, getLastMatch,
        getAnchor): New methods.
        * gnu/regexp/REMatch.java(start1): New field.
        * gnu/regexp/RE.java(initialize): Added support for \z and \G,
        (match): set the starting position to start1[] instead of start[],
        (getMatchImpl): Set the found REMatch to the input,
        (makeCharIndexed): Made public.
        * gnu/regexp/RETokenEndOfPreviousMatch.java: New file.
        * gnu/regexp/RETokenEndSub.java(matchThis, findMatch):
        set the value of start[] copying from start1[].
        * gnu/regexp/RETokenLookBehind.java(matchThis): Added the settings of
        offset.
        * java/util/regex/Matcher.java(inputCharIndexed): New field
        to be used as a parameter of the RE#getMatch.

Index: classpath/gnu/regexp/CharIndexed.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexed.java,v
retrieving revision 1.3
diff -u -r1.3 CharIndexed.java
--- classpath/gnu/regexp/CharIndexed.java       6 Feb 2006 14:03:59 -0000       
1.3
+++ classpath/gnu/regexp/CharIndexed.java       24 Mar 2006 15:40:59 -0000
@@ -93,4 +93,19 @@
      * Returns the effective length of this CharIndexed
      */
     int length();
+
+    /**
+     * Sets the REMatch last found on this input.
+     */
+    void setLastMatch(REMatch match);
+
+    /**
+     * Returns the REMatch last found on this input.
+     */
+    REMatch getLastMatch();
+
+    /**
+     * Returns the anchor.
+     */
+    int getAnchor();
 }
Index: classpath/gnu/regexp/CharIndexedCharArray.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedCharArray.java,v
retrieving revision 1.3
diff -u -r1.3 CharIndexedCharArray.java
--- classpath/gnu/regexp/CharIndexedCharArray.java      6 Feb 2006 14:03:59 
-0000       1.3
+++ classpath/gnu/regexp/CharIndexedCharArray.java      24 Mar 2006 15:40:59 
-0000
@@ -68,4 +68,14 @@
     public int length() {
        return s.length - anchor;
     }
+
+    private REMatch lastMatch;
+    public void setLastMatch(REMatch match) {
+       lastMatch = (REMatch)match.clone();
+       lastMatch.anchor = anchor;
+    }
+    public REMatch getLastMatch() { return lastMatch; }
+
+    public int getAnchor() { return anchor; }
+
 }
Index: classpath/gnu/regexp/CharIndexedInputStream.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedInputStream.java,v
retrieving revision 1.4
diff -u -r1.4 CharIndexedInputStream.java
--- classpath/gnu/regexp/CharIndexedInputStream.java    6 Feb 2006 14:03:59 
-0000       1.4
+++ classpath/gnu/regexp/CharIndexedInputStream.java    24 Mar 2006 15:40:59 
-0000
@@ -155,5 +155,21 @@
        throw new UnsupportedOperationException(
            "difficult to tell the length for an input stream");
     }
+
+    public void setLastMatch(REMatch match) {
+       throw new UnsupportedOperationException(
+           "difficult to support setLastMatch for an input stream");
+    }
+
+    public REMatch getLastMatch() {
+       throw new UnsupportedOperationException(
+           "difficult to support getLastMatch for an input stream");
+    }
+
+    public int getAnchor() {
+       throw new UnsupportedOperationException(
+           "difficult to support getAnchor for an input stream");
+    }
+
 }
 
Index: classpath/gnu/regexp/CharIndexedString.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedString.java,v
retrieving revision 1.3
diff -u -r1.3 CharIndexedString.java
--- classpath/gnu/regexp/CharIndexedString.java 6 Feb 2006 14:03:59 -0000       
1.3
+++ classpath/gnu/regexp/CharIndexedString.java 24 Mar 2006 15:40:59 -0000
@@ -70,4 +70,12 @@
     public int length() {
        return len - anchor;
     }
+
+    private REMatch lastMatch;
+    public void setLastMatch(REMatch match) {
+       lastMatch = (REMatch)match.clone();
+       lastMatch.anchor = anchor;
+    }
+    public REMatch getLastMatch() { return lastMatch; }
+    public int getAnchor() { return anchor; }
 }
Index: classpath/gnu/regexp/CharIndexedStringBuffer.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/CharIndexedStringBuffer.java,v
retrieving revision 1.3
diff -u -r1.3 CharIndexedStringBuffer.java
--- classpath/gnu/regexp/CharIndexedStringBuffer.java   6 Feb 2006 14:03:59 
-0000       1.3
+++ classpath/gnu/regexp/CharIndexedStringBuffer.java   24 Mar 2006 15:40:59 
-0000
@@ -68,4 +68,13 @@
   public int length() {
     return s.length() - anchor;
   }
+
+  private REMatch lastMatch;
+  public void setLastMatch(REMatch match) {
+    lastMatch = (REMatch)match.clone();
+    lastMatch.anchor = anchor;
+  }
+  public REMatch getLastMatch() { return lastMatch; }
+
+  public int getAnchor() { return anchor; }
 }
Index: classpath/gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.20
diff -u -r1.20 RE.java
--- classpath/gnu/regexp/RE.java        22 Mar 2006 22:25:00 -0000      1.20
+++ classpath/gnu/regexp/RE.java        24 Mar 2006 15:41:00 -0000
@@ -966,9 +966,15 @@
        }
 
        // END OF STRING OPERATOR
-        //  \Z
+        //  \Z, \z
 
-       else if (unit.bk && (unit.ch == 'Z') && 
syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+       // FIXME: \Z and \z are different in that if the input string
+       // ends with a line terminator, \Z matches the position before
+       // the final terminator.  This special behavior of \Z is yet
+       // to be implemented.
+
+       else if (unit.bk && (unit.ch == 'Z' || unit.ch == 'z') &&
+                syntax.get(RESyntax.RE_STRING_ANCHORS)) {
          addToken(currentToken);
          currentToken = new RETokenEnd(subIndex,null);
        }
@@ -999,6 +1005,15 @@
          currentToken = getRETokenNamedProperty(subIndex,np,insens,index);
        }
 
+       // END OF PREVIOUS MATCH
+        //  \G
+
+       else if (unit.bk && (unit.ch == 'G') &&
+                syntax.get(RESyntax.RE_STRING_ANCHORS)) {
+         addToken(currentToken);
+         currentToken = new RETokenEndOfPreviousMatch(subIndex);
+       }
+
        // NON-SPECIAL CHARACTER (or escape to make literal)
         //  c | \* for example
 
@@ -1552,7 +1567,7 @@
        }
 
        // Note the start of this subexpression
-       mymatch.start[subIndex] = mymatch.index;
+       mymatch.start1[subIndex] = mymatch.index;
 
        return firstToken.match(input, mymatch);
     }
@@ -1562,8 +1577,6 @@
          mymatch.backtrackStack = new BacktrackStack();
        boolean b = match(input, mymatch);
        if (b) {
-           // mymatch.backtrackStack.push(new REMatch.Backtrack(
-           //     this, input, mymatch, null));
            return mymatch;
        }
        return null;
@@ -1652,6 +1665,7 @@
                  */
                  best.end[0] = best.index;
                  best.finish(input);
+                 input.setLastMatch(best);
                  return best;
              }
          }
@@ -2003,19 +2017,23 @@
   }
 
   // Cast input appropriately or throw exception
-  private static CharIndexed makeCharIndexed(Object input, int index) {
+  // This method was originally a private method, but has been made
+  // public because java.util.regex.Matcher uses this.
+  public static CharIndexed makeCharIndexed(Object input, int index) {
       // We could let a String fall through to final input, but since
       // it's the most likely input type, we check it first.
+      // The case where input is already an instance of CharIndexed is
+      // also supposed to be very likely.
     if (input instanceof String)
       return new CharIndexedString((String) input,index);
+    else if (input instanceof CharIndexed)
+       return (CharIndexed) input; // do we lose index info?
     else if (input instanceof char[])
       return new CharIndexedCharArray((char[]) input,index);
     else if (input instanceof StringBuffer)
       return new CharIndexedStringBuffer((StringBuffer) input,index);
     else if (input instanceof InputStream)
       return new CharIndexedInputStream((InputStream) input,index);
-    else if (input instanceof CharIndexed)
-       return (CharIndexed) input; // do we lose index info?
     else 
        return new CharIndexedString(input.toString(), index);
   }
Index: classpath/gnu/regexp/REMatch.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v
retrieving revision 1.10
diff -u -r1.10 REMatch.java
--- classpath/gnu/regexp/REMatch.java   18 Mar 2006 00:43:11 -0000      1.10
+++ classpath/gnu/regexp/REMatch.java   24 Mar 2006 15:41:00 -0000
@@ -65,7 +65,12 @@
 
     // Package scope; used by RE.
     int index; // used while matching to mark current match position in input
+    // start1[i] is set when the i-th subexp starts. And start1[i] is copied
+    // to start[i] when the i-th subexp ends.  So start[i] keeps the previously
+    // assigned value while the i-th subexp is being processed. This makes
+    // backreference to the i-th subexp within the i-th subexp possible.
     int[] start; // start positions (relative to offset) for each (sub)exp.
+    int[] start1; // start positions (relative to offset) for each (sub)exp.
     int[] end;   // end positions for the same
     // start[i] == -1 or end[i] == -1 means that the start/end position is 
void.
     // start[i] == p or end[i] == p where p < 0 and p != -1 means that
@@ -81,6 +86,7 @@
            REMatch copy = (REMatch) super.clone();
 
            copy.start = (int[]) start.clone();
+           copy.start1 = (int[]) start1.clone();
            copy.end = (int[]) end.clone();
 
            return copy;
@@ -91,6 +97,7 @@
 
     void assignFrom(REMatch other) {
        start = other.start;
+       start1 = other.start1;
        end = other.end;
        index = other.index;
        backtrackStack = other.backtrackStack;
@@ -98,6 +105,7 @@
 
     REMatch(int subs, int anchor, int eflags) {
        start = new int[subs+1];
+       start1 = new int[subs+1];
        end = new int[subs+1];
        this.anchor = anchor;
        this.eflags = eflags;
@@ -128,7 +136,7 @@
        offset = index;
        this.index = 0;
        for (int i = 0; i < start.length; i++) {
-           start[i] = end[i] = -1;
+           start[i] = start1[i] = end[i] = -1;
        }
        backtrackStack = null;
     }
Index: classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
===================================================================
RCS file: classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
diff -N classpath/gnu/regexp/RETokenEndOfPreviousMatch.java
--- /dev/null   1 Jan 1970 00:00:00 -0000
+++ classpath/gnu/regexp/RETokenEndOfPreviousMatch.java 24 Mar 2006 15:41:00 
-0000
@@ -0,0 +1,72 @@
+/*  gnu/regexp/RETokenEndOfPreviousMatch.java
+    Copyright (C) 2006 Free Software Foundation, Inc.
+
+This file is part of GNU Classpath.
+
+GNU Classpath is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
+
+GNU Classpath is distributed in the hope that it will be useful, but
+WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Classpath; see the file COPYING.  If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+02110-1301 USA.
+
+Linking this library statically or dynamically with other modules is
+making a combined work based on this library.  Thus, the terms and
+conditions of the GNU General Public License cover the whole
+combination.
+
+As a special exception, the copyright holders of this library give you
+permission to link this library with independent modules to produce an
+executable, regardless of the license terms of these independent
+modules, and to copy and distribute the resulting executable under
+terms of your choice, provided that you also meet, for each linked
+independent module, the terms and conditions of the license of that
+module.  An independent module is a module which is not derived from
+or based on this library.  If you modify this library, you may extend
+this exception to your version of the library, but you are not
+obligated to do so.  If you do not wish to do so, delete this
+exception statement from your version. */
+
+package gnu.regexp;
+
+class RETokenEndOfPreviousMatch extends RETokenStart {
+
+    RETokenEndOfPreviousMatch(int subIndex) {
+       super(subIndex, null);
+    }
+
+    int getMaximumLength() {
+        return 0;
+    }
+    
+    REMatch matchThis(CharIndexed input, REMatch mymatch) {
+       REMatch lastMatch = input.getLastMatch();
+       if (lastMatch == null) return super.matchThis(input, mymatch);
+       if (input.getAnchor()+mymatch.index ==
+               lastMatch.anchor+lastMatch.index) {
+           return mymatch;
+       }
+       else {
+           return null;
+       }
+    }
+
+    boolean returnsFixedLengthmatches() { return true; }
+
+    int findFixedLengthMatches(CharIndexed input, REMatch mymatch, int max) {
+        if (matchThis(input, mymatch) != null) return max;
+       else return 0;
+    }
+    
+    void dump(StringBuffer os) {
+       os.append("\\G");
+    }
+}
Index: classpath/gnu/regexp/RETokenEndSub.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenEndSub.java,v
retrieving revision 1.4
diff -u -r1.4 RETokenEndSub.java
--- classpath/gnu/regexp/RETokenEndSub.java     11 Mar 2006 01:39:49 -0000      
1.4
+++ classpath/gnu/regexp/RETokenEndSub.java     24 Mar 2006 15:41:00 -0000
@@ -47,11 +47,13 @@
     }
     
     REMatch matchThis(CharIndexed input, REMatch mymatch) {
+       mymatch.start[subIndex] = mymatch.start1[subIndex];
        mymatch.end[subIndex] = mymatch.index;
        return mymatch;
     }
 
     REMatch findMatch(CharIndexed input, REMatch mymatch) {
+       mymatch.start[subIndex] = mymatch.start1[subIndex];
        mymatch.end[subIndex] = mymatch.index;
        return super.findMatch(input, mymatch);
     }
Index: classpath/gnu/regexp/RETokenLookBehind.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenLookBehind.java,v
retrieving revision 1.3
diff -u -r1.3 RETokenLookBehind.java
--- classpath/gnu/regexp/RETokenLookBehind.java 18 Mar 2006 00:43:11 -0000      
1.3
+++ classpath/gnu/regexp/RETokenLookBehind.java 24 Mar 2006 15:41:00 -0000
@@ -65,6 +65,7 @@
     int diff = behind.length() - input.length();
     int curIndex = trymatch.index + diff;
     trymatch.index = 0;
+    trymatch.offset = 0;
     RETokenMatchHereOnly stopper = new RETokenMatchHereOnly(curIndex);
     REToken re1 = (REToken) re.clone();
     re1.chain(stopper);
@@ -79,6 +80,7 @@
          }
       }
       trymatch.index = mymatch.index;
+      trymatch.offset = mymatch.offset;
       return trymatch;
     }
     else {
Index: classpath/java/util/regex/Matcher.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/regex/Matcher.java,v
retrieving revision 1.13
diff -u -r1.13 Matcher.java
--- classpath/java/util/regex/Matcher.java      9 Feb 2006 13:44:59 -0000       
1.13
+++ classpath/java/util/regex/Matcher.java      24 Mar 2006 15:41:00 -0000
@@ -40,6 +40,7 @@
 
 import gnu.regexp.RE;
 import gnu.regexp.REMatch;
+import gnu.regexp.CharIndexed;
 
 /**
  * Instance of a regular expression applied to a char sequence.
@@ -50,6 +51,10 @@
 {
   private Pattern pattern;
   private CharSequence input;
+  // We use CharIndexed as an input object to the getMatch method in order
+  // that /\G/ (the end of the previous match) may work.  The information
+  // of the previous match is stored in the CharIndexed object.
+  private CharIndexed inputCharIndexed;
   private int position;
   private int appendPosition;
   private REMatch match;
@@ -58,6 +63,7 @@
   {
     this.pattern = pattern;
     this.input = input;
+    this.inputCharIndexed = RE.makeCharIndexed(input, 0);
   }
   
   /**
@@ -119,7 +125,7 @@
   public boolean find ()
   {
     boolean first = (match == null);
-    match = pattern.getRE().getMatch(input, position);
+    match = pattern.getRE().getMatch(inputCharIndexed, position);
     if (match != null)
       {
        int endIndex = match.getEndIndex();
@@ -150,7 +156,7 @@
    */
   public boolean find (int start)
   {
-    match = pattern.getRE().getMatch(input, start);
+    match = pattern.getRE().getMatch(inputCharIndexed, start);
     if (match != null)
       {
        position = match.getEndIndex();
@@ -212,7 +218,7 @@
  
   public boolean lookingAt ()
   {
-    match = pattern.getRE().getMatch(input, 0);
+    match = pattern.getRE().getMatch(inputCharIndexed, 0);
     if (match != null)
       {
        if (match.getStartIndex() == 0)
@@ -237,7 +243,7 @@
    */
   public boolean matches ()
   {
-    match = pattern.getRE().getMatch(input, 0, RE.REG_TRY_ENTIRE_MATCH);
+    match = pattern.getRE().getMatch(inputCharIndexed, 0, 
RE.REG_TRY_ENTIRE_MATCH);
     if (match != null)
       {
        if (match.getStartIndex() == 0)

Reply via email to