Change 30169 by [EMAIL PROTECTED] on 2007/02/08 08:19:24
Subject: Re: [PATCH - provisional] H. Merijn Brands idea of buffer
numbering.
From: demerphq <[EMAIL PROTECTED]>
Date: Wed, 7 Feb 2007 22:53:25 +0100
Message-ID: <[EMAIL PROTECTED]>
Affected files ...
... //depot/perl/pod/perlre.pod#123 edit
... //depot/perl/regcomp.c#550 edit
... //depot/perl/t/op/re_tests#117 edit
Differences ...
==== //depot/perl/pod/perlre.pod#123 (text) ====
Index: perl/pod/perlre.pod
--- perl/pod/perlre.pod#122~30126~ 2007-02-05 03:29:08.000000000 -0800
+++ perl/pod/perlre.pod 2007-02-08 00:19:24.000000000 -0800
@@ -711,6 +711,29 @@
/(?:(?s-i)more.*than).*million/i
+=item C<(?|pattern)>
+X<(?|)> X<Branch reset>
+
+This is the "branch reset" pattern, which has the special property
+that the capture buffers are numbered from the same starting point
+in each branch.
+
+Normally capture buffers in a pattern are number sequentially, left
+to right in the pattern. Inside of this construct this behaviour is
+overriden so that the captures buffers in each branch share the same
+numbers. The numbering in each branch will be as normal, and any
+buffers following the use of this pattern will be numbered as though
+the construct contained only one branch, that being the one with the
+most capture buffers in it.
+
+Consider the following pattern. The numbers underneath are which
+buffer number the captured content will be stored in.
+
+
+ # before ---------------branch-reset----------- after
+ / ( a ) (?| x ( y ) z | (p (q) r) | (t) u (v) ) ( z ) /x
+ # 1 2 2 3 2 3 4
+
=item Look-Around Assertions
X<look-around assertion> X<lookaround assertion> X<look-around> X<lookaround>
==== //depot/perl/regcomp.c#550 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#549~30084~ 2007-01-31 02:29:59.000000000 -0800
+++ perl/regcomp.c 2007-02-08 00:19:24.000000000 -0800
@@ -4962,6 +4962,8 @@
const I32 oregflags = RExC_flags;
bool have_branch = 0;
bool is_open = 0;
+ I32 freeze_paren = 0;
+ I32 after_freeze = 0;
/* for (?g), (?gc), and (?o) warnings; warning
about (?c) will warn about (?g) -- japhy */
@@ -5212,6 +5214,13 @@
nextchar(pRExC_state);
return ret;
}
+ break;
+ case '|': /* (?|...) */
+ /* branch reset, behave like a (?:...) except that
+ buffers in alternations share the same numbers */
+ paren = ':';
+ after_freeze = freeze_paren = RExC_npar;
+ break;
case ':': /* (?:...) */
case '>': /* (?>...) */
break;
@@ -5668,6 +5677,11 @@
if (SIZE_ONLY)
RExC_extralen += 2; /* Account for LONGJMP. */
nextchar(pRExC_state);
+ if (freeze_paren) {
+ if (RExC_npar > after_freeze)
+ after_freeze = RExC_npar;
+ RExC_npar = freeze_paren;
+ }
br = regbranch(pRExC_state, &flags, 0, depth+1);
if (br == NULL)
@@ -5769,7 +5783,8 @@
FAIL("Junk on end of regexp"); /* "Can't happen". */
/* NOTREACHED */
}
-
+ if (after_freeze)
+ RExC_npar = after_freeze;
return(ret);
}
==== //depot/perl/t/op/re_tests#117 (text) ====
Index: perl/t/op/re_tests
--- perl/t/op/re_tests#116~30104~ 2007-02-02 22:25:11.000000000 -0800
+++ perl/t/op/re_tests 2007-02-08 00:19:24.000000000 -0800
@@ -1270,3 +1270,13 @@
(?=xy(?<=(aaxy))) ..aaxy.. y $1 aaxy
X(\w+)(?=\s)|X(\w+) Xab y [$1-$2] [-ab]
+
+#check that branch reset works ok.
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) d!o!da y $1-$2-$3 !o!-o-a
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) aabc y $1-$2-$3 a--c
+(?|a(.)b|d(.(o).)d|i(.)(.)j)(.) ixyjp y $1-$2-$3 x-y-p
+(?|(?|(a)|(b))|(?|(c)|(d))) a y $1 a
+(?|(?|(a)|(b))|(?|(c)|(d))) b y $1 b
+(?|(?|(a)|(b))|(?|(c)|(d))) c y $1 c
+(?|(?|(a)|(b))|(?|(c)|(d))) d y $1 d
+(.)(?|(.)(.)x|(.)d)(.) abcde y $1-$2-$3-$4-$5- b-c--e--
End of Patch.