vgritsenko 2004/01/30 06:36:21 Modified: src/java/org/apache/regexp RE.java RETest.java Log: Enhanced match multiline (patch from bug #4137), thanks to Oleg Sukhodolsky Revision Changes Path 1.15 +23 -17 jakarta-regexp/src/java/org/apache/regexp/RE.java Index: RE.java =================================================================== RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RE.java,v retrieving revision 1.14 retrieving revision 1.15 diff -u -r1.14 -r1.15 --- RE.java 6 Sep 2003 01:45:51 -0000 1.14 +++ RE.java 30 Jan 2004 14:36:21 -0000 1.15 @@ -295,6 +295,21 @@ * * <p> * + * <b><font face="times roman">Line terminators</font></b> + * <br> + * A line terminator is a one- or two-character sequence that marks + * the end of a line of the input character sequence. The following + * are recognized as line terminators: + * <ul> + * <li>A newline (line feed) character ('\n'),</li> + * <li>A carriage-return character followed immediately by a newline character ("\r\n"),</li> + * <li>A standalone carriage-return character ('\r'),</li> + * <li>A next-line character ('\u0085'),</li> + * <li>A line-separator character ('\u2028'), or</li> + * <li>A paragraph-separator character ('\u2029).</li> + * </ul> + * + * <p> * RE runs programs compiled by the RECompiler class. But the RE * matcher class does not include the actual regular expression compiler * for reasons of efficiency. In fact, if you want to pre-compile one @@ -462,9 +477,6 @@ static final int offsetNext = 2; // Next index offset (third char) static final int nodeSize = 3; // Node size (in chars) - /** Line Separator */ - static final String NEWLINE = System.getProperty("line.separator"); - // State of current program REProgram program; // Compiled regular expression 'program' transient CharacterIterator search; // The string being matched against @@ -1138,9 +1150,9 @@ case OP_ANY: - if((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) { + if ((matchFlags & MATCH_SINGLELINE) == MATCH_SINGLELINE) { // Match anything - if(search.isEnd(idx)) + if (search.isEnd(idx)) { return -1; } @@ -1840,20 +1852,14 @@ /** @return true if at the i-th position in the 'search' a newline ends */ private boolean isNewline(int i) { + char nextChar = search.charAt(i); - if (i < NEWLINE.length() - 1) { - return false; - } - - if (search.charAt(i) == '\n') { + if (nextChar == '\n' || nextChar == '\r' || nextChar == '\u0085' + || nextChar == '\u2028' || nextChar == '\u2029') + { return true; } - for (int j = NEWLINE.length() - 1; j >= 0; j--, i--) { - if (NEWLINE.charAt(j) != search.charAt(i)) { - return false; - } - } - return true; + return false; } } 1.9 +23 -2 jakarta-regexp/src/java/org/apache/regexp/RETest.java Index: RETest.java =================================================================== RCS file: /home/cvs/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v retrieving revision 1.8 retrieving revision 1.9 diff -u -r1.8 -r1.9 --- RETest.java 20 Dec 2003 17:21:44 -0000 1.8 +++ RETest.java 30 Jan 2004 14:36:21 -0000 1.9 @@ -411,6 +411,27 @@ s = r.subst("variable=value", "$1_test_$212", RE.REPLACE_BACKREFERENCES); assertEquals("Wrong subst() result", "variable_test_value12", s); + + // Test MATCH_MULTILINE. Test for eol/bol symbols. + r = new RE("^abc$", RE.MATCH_MULTILINE); + if (!r.match("\nabc")) { + fail("\"\\nabc\" doesn't match \"^abc$\""); + } + if (!r.match("\rabc")) { + fail("\"\\rabc\" doesn't match \"^abc$\""); + } + if (!r.match("\r\nabc")) { + fail("\"\\r\\nabc\" doesn't match \"^abc$\""); + } + if (!r.match("\u0085abc")) { + fail("\"\\u0085abc\" doesn't match \"^abc$\""); + } + if (!r.match("\u2028abc")) { + fail("\"\\u2028abc\" doesn't match \"^abc$\""); + } + if (!r.match("\u2029abc")) { + fail("\"\\u2029abc\" doesn't match \"^abc$\""); + } } private void testPrecompiledRE() @@ -763,7 +784,7 @@ } log.append(" Paren count: " + regexp.getParenCount() + "\n"); - if(!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount())) + if (!assertEquals(log, "Wrong number of parens", parens.length, regexp.getParenCount())) { return false; }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]