This patch adds possessive quantifier support to gnu.regexp.  Possessive
quantifiers[1] (?+, *+, ++, {n,m}+) were introduced in Java 1.4.  The
patch adds the capability to JAVA_1_4 syntax and also replaces PERL5
syntax as the default java.util.regex.Pattern syntax (note that
possessive quantifiers are currently the only difference between the two
syntaxes).

The lack of possessive quantifiers was reported[2] as blojsom's failure
to start up.  New Mauve test cases[3] were allso written for these
regular expressions.

[1] http://java.sun.com/docs/books/tutorial/extra/regex/quant.html
[2] http://gcc.gnu.org/bugzilla/show_bug.cgi?id=20435
[3] http://sources.redhat.com/ml/mauve-patches/2005/msg00049.html

-- 
Ziga


2005-05-28  Ziga Mahkovec  <[EMAIL PROTECTED]>

        PR libgcj/20435:
        * gnu/regexp/RESyntax.java (RE_POSSESSIVE_OPS): New field.
        (static): Add possessive matching to JAVA_1_4 syntax.
        * gnu/regexp/RETokenRepeated.java (possessive): New field.
        (makePossessive, isPossessive): New methods.
        (match): Don't back off during possessive matching.
        * gnu/regexp/RE.java (initalize): Accept possessive quantifier.
        * java/util/regex/Pattern.java (constructor): Switch syntax from PERL5
        to JAVA_1_4.

Index: gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.5
diff -u -p -r1.5 RE.java
--- gnu/regexp/RE.java	24 May 2005 08:24:35 -0000	1.5
+++ gnu/regexp/RE.java	28 May 2005 12:28:20 -0000
@@ -629,20 +629,29 @@ public class RE extends REToken {
 	currentToken = setRepeated(currentToken,0,Integer.MAX_VALUE,index);
       }
 
-      // ONE-OR-MORE REPEAT OPERATOR
+      // ONE-OR-MORE REPEAT OPERATOR / POSSESSIVE MATCHING OPERATOR
       //  + | \+ depending on RE_BK_PLUS_QM
       //  not available if RE_LIMITED_OPS is set
 
       else if ((unit.ch == '+') && !syntax.get(RESyntax.RE_LIMITED_OPS) && (!syntax.get(RESyntax.RE_BK_PLUS_QM) ^ (unit.bk || quot))) {
 	if (currentToken == null)
           throw new REException(getLocalizedMessage("repeat.no.token"),REException.REG_BADRPT,index);
-	if (currentToken instanceof RETokenRepeated)
-          throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
-	if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+	
+	// Check for possessive matching on RETokenRepeated
+	if (currentToken instanceof RETokenRepeated) {
+	  RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
+	  if (syntax.get(RESyntax.RE_POSSESSIVE_OPS) && !tokenRep.isPossessive() && !tokenRep.isStingy())
+	    tokenRep.makePossessive();
+	  else
+	    throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+
+	}
+	else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
 	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
-	if (currentToken.getMinimumLength() == 0)
+	else if (currentToken.getMinimumLength() == 0)
 	  throw new REException(getLocalizedMessage("repeat.empty.token"),REException.REG_BADRPT,index);
-	currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
+	else
+	  currentToken = setRepeated(currentToken,1,Integer.MAX_VALUE,index);
       }
 
       // ZERO-OR-ONE REPEAT OPERATOR / STINGY MATCHING OPERATOR
@@ -655,13 +664,14 @@ public class RE extends REToken {
 
 	// Check for stingy matching on RETokenRepeated
 	if (currentToken instanceof RETokenRepeated) {
-          if (syntax.get(RESyntax.RE_STINGY_OPS) && !((RETokenRepeated)currentToken).isStingy())
-            ((RETokenRepeated)currentToken).makeStingy();
-          else
-            throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
-        }
-        else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
-          throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
+	  RETokenRepeated tokenRep = (RETokenRepeated)currentToken;
+	  if (syntax.get(RESyntax.RE_STINGY_OPS) && !tokenRep.isStingy() && !tokenRep.isPossessive())
+	    tokenRep.makeStingy();
+	  else
+	    throw new REException(getLocalizedMessage("repeat.chained"),REException.REG_BADRPT,index);
+	}
+	else if (currentToken instanceof RETokenWordBoundary || currentToken instanceof RETokenWordBoundary)
+	  throw new REException(getLocalizedMessage("repeat.assertion"),REException.REG_BADRPT,index);
 	else
 	  currentToken = setRepeated(currentToken,0,1,index);
       }
Index: gnu/regexp/RESyntax.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RESyntax.java,v
retrieving revision 1.1
diff -u -p -r1.1 RESyntax.java
--- gnu/regexp/RESyntax.java	7 Mar 2004 23:58:54 -0000	1.1
+++ gnu/regexp/RESyntax.java	28 May 2005 12:28:20 -0000
@@ -197,7 +197,12 @@ public final class RESyntax implements S
    */
   public static final int RE_CHAR_CLASS_ESC_IN_LISTS   = 24;
 
-  private static final int BIT_TOTAL                   = 25;
+  /**
+   * Syntax bit.  Possessive matching is allowed (++, *+, ?+, {x,y}+).
+   */
+  public static final int RE_POSSESSIVE_OPS            = 25;
+
+  private static final int BIT_TOTAL                   = 26;
 
   /**
    * Predefined syntax.
@@ -425,6 +430,7 @@ public final class RESyntax implements S
 
       RE_SYNTAX_JAVA_1_4 = new RESyntax(RE_SYNTAX_PERL5)
 	  // XXX
+	  .set(RE_POSSESSIVE_OPS)         // *+,?+,++,{}+
 	  .makeFinal();
   }
 
Index: gnu/regexp/RETokenRepeated.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RETokenRepeated.java,v
retrieving revision 1.1
diff -u -p -r1.1 RETokenRepeated.java
--- gnu/regexp/RETokenRepeated.java	7 Mar 2004 23:58:54 -0000	1.1
+++ gnu/regexp/RETokenRepeated.java	28 May 2005 12:28:20 -0000
@@ -44,6 +44,7 @@ final class RETokenRepeated extends RETo
     private REToken token;
     private int min,max;
     private boolean stingy;
+    private boolean possessive;
     
     RETokenRepeated(int subIndex, REToken token, int min, int max) {
 	super(subIndex);
@@ -61,6 +62,16 @@ final class RETokenRepeated extends RETo
     boolean isStingy() {
 	return stingy;
     }
+
+    /** Sets possessive matching mode to true. */
+    void makePossessive() {
+        possessive = true;
+    }
+
+    /** Queries if this token has possessive matching enabled. */
+    boolean isPossessive() {
+        return possessive;
+    }
     
     /**
      * The minimum length of a repeated token is the minimum length
@@ -172,6 +183,8 @@ final class RETokenRepeated extends RETo
 		}
 	    }
 	    // else did not match rest of the tokens, try again on smaller sample
+	    // or break out when performing possessive matching
+	    if (possessive) break;
 	}
 	if (allResults != null) {
 	    mymatch.assignFrom(allResults); // does this get all?
Index: java/util/regex/Pattern.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/regex/Pattern.java,v
retrieving revision 1.11
diff -u -p -r1.11 Pattern.java
--- java/util/regex/Pattern.java	24 May 2005 08:24:35 -0000	1.11
+++ java/util/regex/Pattern.java	28 May 2005 12:28:21 -0000
@@ -84,8 +84,7 @@ public final class Pattern implements Se
     // if ((flags & UNICODE_CASE) != 0) gnuFlags =
     // if ((flags & CANON_EQ) != 0) gnuFlags =
 
-    // Eventually there will be such a thing as JDK 1_4 syntax
-    RESyntax syntax = RESyntax.RE_SYNTAX_PERL5;
+    RESyntax syntax = RESyntax.RE_SYNTAX_JAVA_1_4;
     if ((flags & UNIX_LINES) != 0)
       {
 	// Use a syntax set with \n for linefeeds?
_______________________________________________
Classpath-patches mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/classpath-patches

Reply via email to