From: Ito Kazumitsu <[EMAIL PROTECTED]> Date: Wed, 08 Feb 2006 00:01:46 +0900 (JST)
> I added support for backslash escape in a replacement as an > optional feature for fear that some application may depend > on the current behavior of gnu.regexp. Reading Sun's API docment, I found that the replacement may contain not only $1-$9 but also $10, $11, $12, ... 2006-02-07 Ito Kazumitsu <[EMAIL PROTECTED]> Fixes bug #26112 * gnu/regexp/RE.java(REG_REPLACE_USE_BACKSLASHESCAPE): New execution flag which enables backslash escape in a replacement. (getReplacement): New public static method. (substituteImpl),(substituteAllImpl): Use getReplacement. * gnu/regexp/REMatch.java(substituteInto): Replace $n even if n>=10. * java/util/regex/Matcher.java(appendReplacement) Use RE#getReplacement. (replaceFirst),(replaceAll): Use RE.REG_REPLACE_USE_BACKSLASHESCAPE.
Index: classpath/gnu/regexp/RE.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/RE.java,v retrieving revision 1.16 diff -u -r1.16 RE.java --- classpath/gnu/regexp/RE.java 6 Feb 2006 14:03:59 -0000 1.16 +++ classpath/gnu/regexp/RE.java 8 Feb 2006 13:03:00 -0000 @@ -142,7 +142,7 @@ * Compilation flag. Do not differentiate case. Subsequent * searches using this RE will be case insensitive. */ - public static final int REG_ICASE = 2; + public static final int REG_ICASE = 0x02; /** * Compilation flag. The match-any-character operator (dot) @@ -150,14 +150,14 @@ * bit RE_DOT_NEWLINE (see RESyntax for details). This is equivalent to * the "/s" operator in Perl. */ - public static final int REG_DOT_NEWLINE = 4; + public static final int REG_DOT_NEWLINE = 0x04; /** * Compilation flag. Use multiline mode. In this mode, the ^ and $ * anchors will match based on newlines within the input. This is * equivalent to the "/m" operator in Perl. */ - public static final int REG_MULTILINE = 8; + public static final int REG_MULTILINE = 0x08; /** * Execution flag. @@ -186,14 +186,14 @@ * // m4.toString(): "fool"<BR> * </CODE> */ - public static final int REG_NOTBOL = 16; + public static final int REG_NOTBOL = 0x10; /** * Execution flag. * The match-end operator ($) does not match at the end * of the input string. Useful for matching on substrings. */ - public static final int REG_NOTEOL = 32; + public static final int REG_NOTEOL = 0x20; /** * Execution flag. @@ -207,7 +207,7 @@ * the example under REG_NOTBOL. It also affects the use of the \< * and \b operators. */ - public static final int REG_ANCHORINDEX = 64; + public static final int REG_ANCHORINDEX = 0x40; /** * Execution flag. @@ -216,14 +216,24 @@ * the corresponding subexpressions. For example, you may want to * replace all matches of "one dollar" with "$1". */ - public static final int REG_NO_INTERPOLATE = 128; + public static final int REG_NO_INTERPOLATE = 0x80; /** * Execution flag. * Try to match the whole input string. An implicit match-end operator * is added to this regexp. */ - public static final int REG_TRY_ENTIRE_MATCH = 256; + public static final int REG_TRY_ENTIRE_MATCH = 0x0100; + + /** + * Execution flag. + * The substitute and substituteAll methods will treat the + * character '\' in the replacement as an escape to a literal + * character. In this case "\n", "\$", "\\", "\x40" and "\012" + * will become "n", "$", "\", "x40" and "012" respectively. + * This flag has no effect if REG_NO_INTERPOLATE is set on. + */ + public static final int REG_REPLACE_USE_BACKSLASHESCAPE = 0x0200; /** Returns a string representing the version of the gnu.regexp package. */ public static final String version() { @@ -1614,8 +1624,7 @@ StringBuffer buffer = new StringBuffer(); REMatch m = getMatchImpl(input,index,eflags,buffer); if (m==null) return buffer.toString(); - buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ? - replace : m.substituteInto(replace) ); + buffer.append(getReplacement(replace, m, eflags)); if (input.move(m.end[0])) { do { buffer.append(input.charAt(0)); @@ -1676,8 +1685,7 @@ StringBuffer buffer = new StringBuffer(); REMatch m; while ((m = getMatchImpl(input,index,eflags,buffer)) != null) { - buffer.append( ((eflags & REG_NO_INTERPOLATE) > 0) ? - replace : m.substituteInto(replace) ); + buffer.append(getReplacement(replace, m, eflags)); index = m.getEndIndex(); if (m.end[0] == 0) { char ch = input.charAt(0); @@ -1692,6 +1700,39 @@ } return buffer.toString(); } + + public static String getReplacement(String replace, REMatch m, int eflags) { + if ((eflags & REG_NO_INTERPOLATE) > 0) + return replace; + else { + if ((eflags & REG_REPLACE_USE_BACKSLASHESCAPE) > 0) { + StringBuffer sb = new StringBuffer(); + int l = replace.length(); + for (int i = 0; i < l; i++) { + char c = replace.charAt(i); + switch(c) { + case '\\': + i++; + // Let StringIndexOutOfBoundsException be thrown. + sb.append(replace.charAt(i)); + break; + case '$': + int i1 = i + 1; + while (i1 < replace.length() && + Character.isDigit(replace.charAt(i1))) i1++; + sb.append(m.substituteInto(replace.substring(i, i1))); + i = i1 - 1; + break; + default: + sb.append(c); + } + } + return sb.toString(); + } + else + return m.substituteInto(replace); + } + } /* Helper function for constructor */ private void addToken(REToken next) { Index: classpath/gnu/regexp/REMatch.java =================================================================== RCS file: /cvsroot/classpath/classpath/gnu/regexp/REMatch.java,v retrieving revision 1.6 diff -u -r1.6 REMatch.java --- classpath/gnu/regexp/REMatch.java 2 Feb 2006 15:16:59 -0000 1.6 +++ classpath/gnu/regexp/REMatch.java 8 Feb 2006 13:03:00 -0000 @@ -246,6 +246,8 @@ * <code>$0</code> through <code>$9</code>. <code>$0</code> matches * the full substring matched; <code>$<i>n</i></code> matches * subexpression number <i>n</i>. + * <code>$10, $11, ...</code> may match the 10th, 11th, ... subexpressions + * if such subexpressions exist. * * @param input A string consisting of literals and <code>$<i>n</i></code> tokens. */ @@ -256,6 +258,16 @@ for (pos = 0; pos < input.length()-1; pos++) { if ((input.charAt(pos) == '$') && (Character.isDigit(input.charAt(pos+1)))) { int val = Character.digit(input.charAt(++pos),10); + int pos1 = pos + 1; + while (pos1 < input.length() && + Character.isDigit(input.charAt(pos1))) { + int val1 = val*10 + Character.digit(input.charAt(pos1),10); + if (val1 >= start.length) break; + pos1++; + val = val1; + } + pos = pos1 - 1; + if (val < start.length) { output.append(toString(val)); } Index: classpath/java/util/regex/Matcher.java =================================================================== RCS file: /cvsroot/classpath/classpath/java/util/regex/Matcher.java,v retrieving revision 1.12 diff -u -r1.12 Matcher.java --- classpath/java/util/regex/Matcher.java 6 Feb 2006 14:24:17 -0000 1.12 +++ classpath/java/util/regex/Matcher.java 8 Feb 2006 13:03:00 -0000 @@ -75,7 +75,8 @@ assertMatchOp(); sb.append(input.subSequence(appendPosition, match.getStartIndex()).toString()); - sb.append(match.substituteInto(replacement)); + sb.append(RE.getReplacement(replacement, match, + RE.REG_REPLACE_USE_BACKSLASHESCAPE)); appendPosition = match.getEndIndex(); return this; } @@ -190,7 +191,8 @@ { reset(); // Semantics might not quite match - return pattern.getRE().substitute(input, replacement, position); + return pattern.getRE().substitute(input, replacement, position, + RE.REG_REPLACE_USE_BACKSLASHESCAPE); } /** @@ -199,7 +201,8 @@ public String replaceAll (String replacement) { reset(); - return pattern.getRE().substituteAll(input, replacement, position); + return pattern.getRE().substituteAll(input, replacement, position, + RE.REG_REPLACE_USE_BACKSLASHESCAPE); } public int groupCount ()