I added support for backslash escape in a replacement as an
optional feature for fear that some application may depend
on the current behavior of gnu.regexp.

ChangeLog

2006-02-07  Ito Kazumitsu  <[EMAIL PROTECTED]>

        Fixes bug #26112
        * gnu/regexp/RE.java(REG_REPLACE_USE_BACKSLASHESCAPE): New execution
        flag which enables backslash escape in a replacement.
        (getReplacement): New public static method. 
        (substituteImpl),(substituteAllImpl): Use getReplacement.
        * java/util/regex/Matcher.java(appendReplacement)
        Use RE#getReplacement.
        (replaceFirst),(replaceAll): Use RE.REG_REPLACE_USE_BACKSLASHESCAPE.

Index: classpath/gnu/regexp/RE.java
===================================================================
RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v
retrieving revision 1.16
diff -u -r1.16 RE.java
--- classpath/gnu/regexp/RE.java        6 Feb 2006 14:03:59 -0000       1.16
+++ classpath/gnu/regexp/RE.java        7 Feb 2006 14:38:32 -0000
@@ -142,7 +142,7 @@
    * Compilation flag. Do  not  differentiate  case.   Subsequent
    * searches  using  this  RE will be case insensitive.
    */
-  public static final int REG_ICASE = 2;
+  public static final int REG_ICASE = 0x02;
 
   /**
    * Compilation flag. The match-any-character operator (dot)
@@ -150,14 +150,14 @@
    * bit RE_DOT_NEWLINE (see RESyntax for details).  This is equivalent to
    * the "/s" operator in Perl.
    */
-  public static final int REG_DOT_NEWLINE = 4;
+  public static final int REG_DOT_NEWLINE = 0x04;
 
   /**
    * Compilation flag. Use multiline mode.  In this mode, the ^ and $
    * anchors will match based on newlines within the input. This is
    * equivalent to the "/m" operator in Perl.
    */
-  public static final int REG_MULTILINE = 8;
+  public static final int REG_MULTILINE = 0x08;
 
   /**
    * Execution flag.
@@ -186,14 +186,14 @@
    * //  m4.toString(): "fool"<BR>
    * </CODE>
    */
-  public static final int REG_NOTBOL = 16;
+  public static final int REG_NOTBOL = 0x10;
 
   /**
    * Execution flag.
    * The match-end operator ($) does not match at the end
    * of the input string. Useful for matching on substrings.
    */
-  public static final int REG_NOTEOL = 32;
+  public static final int REG_NOTEOL = 0x20;
 
   /**
    * Execution flag.
@@ -207,7 +207,7 @@
    * the example under REG_NOTBOL.  It also affects the use of the \&lt;
    * and \b operators.
    */
-  public static final int REG_ANCHORINDEX = 64;
+  public static final int REG_ANCHORINDEX = 0x40;
 
   /**
    * Execution flag.
@@ -216,14 +216,24 @@
    * the corresponding subexpressions.  For example, you may want to
    * replace all matches of "one dollar" with "$1".
    */
-  public static final int REG_NO_INTERPOLATE = 128;
+  public static final int REG_NO_INTERPOLATE = 0x80;
 
   /**
    * Execution flag.
    * Try to match the whole input string. An implicit match-end operator
    * is added to this regexp.
    */
-  public static final int REG_TRY_ENTIRE_MATCH = 256;
+  public static final int REG_TRY_ENTIRE_MATCH = 0x0100;
+
+  /**
+   * Execution flag.
+   * The substitute and substituteAll methods will treat the
+   * character '\' in the replacement as an escape to a literal
+   * character. In this case "\n", "\$", "\\", "\x40" and "\012"
+   * will become "n", "$", "\", "x40" and "012" respectively.
+   * This flag has no effect if REG_NO_INTERPOLATE is set on.
+   */
+  public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200;
 
   /** Returns a string representing the version of the gnu.regexp package. */
   public static final String version() {
@@ -1614,8 +1624,7 @@
     StringBuffer buffer = new StringBuffer();
     REMatch m = getMatchImpl(input,index,eflags,buffer);
     if (m==null) return buffer.toString();
-    buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
-                  replace : m.substituteInto(replace) );
+    buffer.append(getReplacement(replace, m, eflags));
     if (input.move(m.end[0])) {
       do {
        buffer.append(input.charAt(0));
@@ -1676,8 +1685,7 @@
     StringBuffer buffer = new StringBuffer();
     REMatch m;
     while ((m = getMatchImpl(input,index,eflags,buffer)) != null) {
-       buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ?
-                      replace : m.substituteInto(replace) );
+      buffer.append(getReplacement(replace, m, eflags));
       index = m.getEndIndex();
       if (m.end[0] == 0) {
        char ch = input.charAt(0);
@@ -1692,6 +1700,37 @@
     }
     return buffer.toString();
   }
+
+  public static String getReplacement(String replace, REMatch m, int eflags) {
+    if ((eflags & REG_NO_INTERPOLATE) > 0)
+      return replace;
+    else {
+      if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) {
+        StringBuffer sb = new StringBuffer();
+        int l = replace.length();
+        for (int i = 0; i < l; i++) {
+           char c = replace.charAt(i);
+            switch(c) {
+            case '\\':
+              i++;
+              // Let StringIndexOutOfBoundsException be thrown.
+              sb.append(replace.charAt(i));
+              break;
+            case '$':
+              // Let StringIndexOutOfBoundsException be thrown.
+              sb.append(m.substituteInto(replace.substring(i, i+2)));
+              i++;
+              break;
+            default:
+              sb.append(c);
+            }
+        }
+        return sb.toString();
+      }
+      else
+        return m.substituteInto(replace);
+    }
+  }    
   
   /* Helper function for constructor */
   private void addToken(REToken next) {
Index: classpath/java/util/regex/Matcher.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/regex/Matcher.java,v
retrieving revision 1.12
diff -u -r1.12 Matcher.java
--- classpath/java/util/regex/Matcher.java      6 Feb 2006 14:24:17 -0000       
1.12
+++ classpath/java/util/regex/Matcher.java      7 Feb 2006 14:38:32 -0000
@@ -75,7 +75,8 @@
     assertMatchOp();
     sb.append(input.subSequence(appendPosition,
                                match.getStartIndex()).toString());
-    sb.append(match.substituteInto(replacement));
+    sb.append(RE.getReplacement(replacement, match,
+       RE.REG_REPLACE_USE_BACKSLASHESCAPE));
     appendPosition = match.getEndIndex();
     return this;
   }
@@ -190,7 +191,8 @@
   {
     reset();
     // Semantics might not quite match
-    return pattern.getRE().substitute(input, replacement, position);
+    return pattern.getRE().substitute(input, replacement, position,
+       RE.REG_REPLACE_USE_BACKSLASHESCAPE);
   }
 
   /**
@@ -199,7 +201,8 @@
   public String replaceAll (String replacement)
   {
     reset();
-    return pattern.getRE().substituteAll(input, replacement, position);
+    return pattern.getRE().substituteAll(input, replacement, position,
+       RE.REG_REPLACE_USE_BACKSLASHESCAPE);
   }
   
   public int groupCount ()

Reply via email to