Change 30678 by [EMAIL PROTECTED] on 2007/03/22 09:01:37 Subject: [PATCH] Resolve PL_curpm issues with (??{}) and fix corruption of match results when pattern is a qr. From: demerphq <[EMAIL PROTECTED]> Date: Wed, 21 Mar 2007 10:39:24 +0100 Message-ID: <[EMAIL PROTECTED]> plus two follow-up patches (minor tweaks)
Affected files ... ... //depot/perl/dump.c#257 edit ... //depot/perl/embed.fnc#471 edit ... //depot/perl/embed.h#672 edit ... //depot/perl/ext/Devel/Peek/t/Peek.t#28 edit ... //depot/perl/ext/Encode/t/Aliases.t#18 edit ... //depot/perl/ext/re/re.pm#48 edit ... //depot/perl/ext/re/t/re_funcs.t#8 edit ... //depot/perl/global.sym#328 edit ... //depot/perl/lib/Tie/Hash/NamedCapture.pm#3 edit ... //depot/perl/pp_ctl.c#605 edit ... //depot/perl/proto.h#808 edit ... //depot/perl/regcomp.c#565 edit ... //depot/perl/regcomp.h#119 edit ... //depot/perl/regexec.c#523 edit ... //depot/perl/regexp.h#92 edit ... //depot/perl/t/op/pat.t#282 edit ... //depot/perl/universal.c#158 edit Differences ... ==== //depot/perl/dump.c#257 (text) ==== Index: perl/dump.c --- perl/dump.c#256~30557~ 2007-03-12 15:14:27.000000000 -0700 +++ perl/dump.c 2007-03-22 02:01:37.000000000 -0700 @@ -1263,8 +1263,20 @@ Perl_dump_indent(aTHX_ level, file, " MINMATCH\n"); } if (mg->mg_obj) { - Perl_dump_indent(aTHX_ level, file, " MG_OBJ = 0x%"UVxf"\n", PTR2UV(mg->mg_obj)); - if (mg->mg_flags & MGf_REFCOUNTED) + Perl_dump_indent(aTHX_ level, file, " MG_OBJ = 0x%"UVxf"\n", + PTR2UV(mg->mg_obj)); + if (mg->mg_type == PERL_MAGIC_qr) { + regexp *re=(regexp *)mg->mg_obj; + SV *dsv= sv_newmortal(); + const char * const s = pv_pretty(dsv, re->wrapped, re->wraplen, + 60, NULL, NULL, + ( PERL_PV_PRETTY_QUOTE | PERL_PV_ESCAPE_RE | PERL_PV_PRETTY_ELIPSES | + ((re->extflags & RXf_UTF8) ? PERL_PV_ESCAPE_UNI : 0)) + ); + Perl_dump_indent(aTHX_ level+1, file, " PAT = %s\n", s); + Perl_dump_indent(aTHX_ level+1, file, " REFCNT = %"IVdf"\n", (IV*)re->refcnt); + } + if (mg->mg_flags & MGf_REFCOUNTED) do_sv_dump(level+2, file, mg->mg_obj, nest+1, maxnest, dumpops, pvlim); /* MG is already +1 */ } if (mg->mg_len) ==== //depot/perl/embed.fnc#471 (text) ==== Index: perl/embed.fnc --- perl/embed.fnc#470~30629~ 2007-03-19 01:58:08.000000000 -0700 +++ perl/embed.fnc 2007-03-22 02:01:37.000000000 -0700 @@ -677,6 +677,7 @@ |NN char* strend|NN char* strbeg|I32 minend \ |NN SV* screamer|U32 nosave Ap |void |pregfree |NULLOK struct regexp* r +EXp |struct regexp* |reg_temp_copy |NN struct regexp* r Ap |void |regfree_internal|NULLOK struct regexp* r Ap |char * |reg_stringify |NN MAGIC *mg|NULLOK STRLEN *lp|NULLOK U32 *flags|NULLOK I32 *haseval #if defined(USE_ITHREADS) ==== //depot/perl/embed.h#672 (text+w) ==== Index: perl/embed.h --- perl/embed.h#671~30629~ 2007-03-19 01:58:08.000000000 -0700 +++ perl/embed.h 2007-03-22 02:01:37.000000000 -0700 @@ -685,6 +685,9 @@ #define regclass_swash Perl_regclass_swash #define pregexec Perl_pregexec #define pregfree Perl_pregfree +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_temp_copy Perl_reg_temp_copy +#endif #define regfree_internal Perl_regfree_internal #define reg_stringify Perl_reg_stringify #if defined(USE_ITHREADS) @@ -2906,6 +2909,9 @@ #define regclass_swash(a,b,c,d,e) Perl_regclass_swash(aTHX_ a,b,c,d,e) #define pregexec(a,b,c,d,e,f,g) Perl_pregexec(aTHX_ a,b,c,d,e,f,g) #define pregfree(a) Perl_pregfree(aTHX_ a) +#if defined(PERL_CORE) || defined(PERL_EXT) +#define reg_temp_copy(a) Perl_reg_temp_copy(aTHX_ a) +#endif #define regfree_internal(a) Perl_regfree_internal(aTHX_ a) #define reg_stringify(a,b,c,d) Perl_reg_stringify(aTHX_ a,b,c,d) #if defined(USE_ITHREADS) ==== //depot/perl/ext/Devel/Peek/t/Peek.t#28 (text) ==== Index: perl/ext/Devel/Peek/t/Peek.t --- perl/ext/Devel/Peek/t/Peek.t#27~29693~ 2007-01-05 01:55:22.000000000 -0800 +++ perl/ext/Devel/Peek/t/Peek.t 2007-03-22 02:01:37.000000000 -0700 @@ -282,6 +282,8 @@ MG_VIRTUAL = $ADDR MG_TYPE = PERL_MAGIC_qr\(r\) MG_OBJ = $ADDR + PAT = "\(\?-xism:tic\)" + REFCNT = 2 STASH = $ADDR\\t"Regexp"'); do_test(16, ==== //depot/perl/ext/Encode/t/Aliases.t#18 (text) ==== Index: perl/ext/Encode/t/Aliases.t --- perl/ext/Encode/t/Aliases.t#17~28098~ 2006-05-04 05:06:33.000000000 -0700 +++ perl/ext/Encode/t/Aliases.t 2007-03-22 02:01:37.000000000 -0700 @@ -122,6 +122,7 @@ print "# alias test; \$ON_EBCDIC == $ON_EBCDIC\n"; foreach my $a (keys %a2c){ + print "# $a => $a2c{$a}\n"; my $e = Encode::find_encoding($a); is((defined($e) and $e->name), $a2c{$a},$a) or warn "alias was $a";; ==== //depot/perl/ext/re/re.pm#48 (text) ==== Index: perl/ext/re/re.pm --- perl/ext/re/re.pm#47~30436~ 2007-03-01 02:54:09.000000000 -0800 +++ perl/ext/re/re.pm 2007-03-22 02:01:37.000000000 -0700 @@ -473,45 +473,39 @@ are using thinks is the longest. If you believe that the result is wrong please report it via the L<perlbug> utility. -=item regname($name,$qr,$all) +=item regname($name,$all) -Returns the contents of a named buffer. If $qr is missing, or is not the -result of a qr// then returns the result of the last successful match. If -$all is true then returns an array ref containing one entry per buffer, +Returns the contents of a named buffer of the last successful match. If +$all is true, then returns an array ref containing one entry per buffer, otherwise returns the first defined buffer. -=item regnames($qr,$all) +=item regnames($all) -Returns a list of all of the named buffers defined in a pattern. If -$all is true then it returns all names defined, if not returns only -names which were involved in the last successful match. If $qr is omitted -or is not the result of a qr// then returns the details for the last -successful match. +Returns a list of all of the named buffers defined in the last successful +match. If $all is true, then it returns all names defined, if not it returns +only names which were involved in the match. -=item regnames_iterinit($qr) +=item regnames_iterinit() -Initializes the internal hash iterator associated to a regexps named capture -buffers. If $qr is omitted resets the iterator associated with the regexp used -in the last successful match. +Initializes the internal hash iterator associated to the last successful +matches named capture buffers. -=item regnames_iternext($qr,$all) +=item regnames_iternext($all) -Gets the next key from the hash associated with a regexp. If $qr -is omitted resets the iterator associated with the regexp used in the -last successful match. If $all is true returns the keys of all of the +Gets the next key from the named capture buffer hash associated with the +last successful match. If $all is true returns the keys of all of the distinct named buffers in the pattern, if not returns only those names used in the last successful match. -=item regnames_count($qr) +=item regnames_count() -Returns the number of distinct names defined in the regexp $qr. If -$qr is omitted or not a regexp returns the count of names in the -last successful match. - -B<Note:> that this result is always the actual number of distinct -named buffers defined, it may not actually match that which is -returned by C<regnames()> and related routines when those routines -have not been called with the $all parameter set.. +Returns the number of distinct names defined in the pattern used +for the last successful match. + +B<Note:> this result is always the actual number of distinct +named buffers defined, it may not actually match that which is +returned by C<regnames()> and related routines when those routines +have not been called with the $all parameter set. =back ==== //depot/perl/ext/re/t/re_funcs.t#8 (text) ==== Index: perl/ext/re/t/re_funcs.t --- perl/ext/re/t/re_funcs.t#7~30517~ 2007-03-08 07:06:49.000000000 -0800 +++ perl/ext/re/t/re_funcs.t 2007-03-22 02:01:37.000000000 -0700 @@ -42,19 +42,14 @@ if ('1234'=~/(?:(?<A>\d)|(?<C>!))(?<B>\d)(?<A>\d)(?<B>\d)/){ - my $qr = qr/(?<foo>foo)(?<bar>bar)/; - my @names = sort +regnames($qr); - is("@names","","regnames"); - @names = sort +regnames($qr,1); - is("@names","bar foo","regnames - all"); - @names = sort +regnames(); + my @names = sort +regnames(); is("@names","A B","regnames"); - @names = sort +regnames(undef,1); + @names = sort +regnames(1); is("@names","A B C","regnames"); - is(join("", @{regname("A",undef,1)}),"13"); - is(join("", @{regname("B",undef,1)}),"24"); + is(join("", @{regname("A",1)}),"13"); + is(join("", @{regname("B",1)}),"24"); { - if ('foobar'=~/$qr/) { + if ('foobar'=~/(?<foo>foo)(?<bar>bar)/) { regnames_iterinit(); my @res; while (defined(my $key=regnames_iternext)) { @@ -68,20 +63,7 @@ } } is(regnames_count(),3); - is(regnames_count($qr),2); -} -{ - use warnings; - require Tie::Hash::NamedCapture; - my $qr = qr/(?<foo>foo)/; - if ( 'foo' =~ /$qr/ ) { - tie my %hash,"Tie::Hash::NamedCapture",re => $qr; - if ('bar'=~/bar/) { - # last successful match is now different - is($hash{foo},'foo'); # prints foo - } - } } # New tests above this line, don't forget to update the test count below! -use Test::More tests => 23; +use Test::More tests => 19; # No tests here! ==== //depot/perl/global.sym#328 (text+w) ==== Index: perl/global.sym --- perl/global.sym#327~30552~ 2007-03-12 08:30:39.000000000 -0700 +++ perl/global.sym 2007-03-22 02:01:37.000000000 -0700 @@ -390,6 +390,7 @@ Perl_regclass_swash Perl_pregexec Perl_pregfree +Perl_reg_temp_copy Perl_regfree_internal Perl_reg_stringify Perl_regdupe_internal ==== //depot/perl/lib/Tie/Hash/NamedCapture.pm#3 (text) ==== Index: perl/lib/Tie/Hash/NamedCapture.pm --- perl/lib/Tie/Hash/NamedCapture.pm#2~30518~ 2007-03-08 07:48:53.000000000 -0800 +++ perl/lib/Tie/Hash/NamedCapture.pm 2007-03-22 02:01:37.000000000 -0700 @@ -3,27 +3,18 @@ use strict; use warnings; -our $VERSION = "0.04"; +our $VERSION = "0.05"; sub TIEHASH { my $classname = shift; my %opts = @_; - if ($opts{re} && !re::is_regexp($opts{re})) { - require Carp; - Carp::croak("'re' parameter to " . __PACKAGE__ - . "->TIEHASH must be a qr//."); - } - - my $self = bless { - all => $opts{all}, - re => $opts{re}, - }, $classname; + my $self = bless { all => $opts{all} }, $classname; return $self; } sub FETCH { - return re::regname($_[1],$_[0]->{re},$_[0]->{all}); + return re::regname($_[1],$_[0]->{all}); } sub STORE { @@ -32,16 +23,16 @@ } sub FIRSTKEY { - re::regnames_iterinit($_[0]->{re}); + re::regnames_iterinit(); return $_[0]->NEXTKEY; } sub NEXTKEY { - return re::regnames_iternext($_[0]->{re},$_[0]->{all}); + return re::regnames_iternext($_[0]->{all}); } sub EXISTS { - return defined re::regname( $_[1], $_[0]->{re},$_[0]->{all}); + return defined re::regname( $_[1], $_[0]->{all}); } sub DELETE { @@ -55,7 +46,7 @@ } sub SCALAR { - return scalar re::regnames($_[0]->{re},$_[0]->{all}); + return scalar re::regnames($_[0]->{all}); } tie %+, __PACKAGE__; @@ -74,19 +65,13 @@ tie my %hash, "Tie::Hash::NamedCapture"; # %hash now behaves like %+ - tie my %hash, "Tie::Hash::NamedCapture", re => $qr, all => 1; + tie my %hash, "Tie::Hash::NamedCapture", all => 1; # %hash now access buffers from regexp in $qr like %- =head1 DESCRIPTION This module is used to implement the special hashes C<%+> and C<%->, but it -can be used independently. - -When the C<re> parameter is set to a C<qr//> expression, then the tied -hash is bound to that particular regexp and will return the results of its -last successful match. If the parameter is omitted, then the hash behaves -just as C<$1> does by referencing the last successful match in the -currently active dynamic scope. +can be used to tie other variables as you choose. When the C<all> parameter is provided, then the tied hash elements will be array refs listing the contents of each capture buffer whose name is the @@ -104,20 +89,6 @@ regular expression; the keys of C<%+>-like hashes list only the names of buffers that have captured (and that are thus associated to defined values). -For instance: - - my $qr = qr/(?<foo>bar)/; - if ( 'bar' =~ $qr ) { - tie my %hash, "Tie::Hash::NamedCapture", re => $qr; - print $+{foo}; # prints "bar" - print $hash{foo}; # prints "bar" too - if ( 'bar' =~ /bar/ ) { - # last successful match is now different - print $+{foo}; # prints nothing (undef) - print $hash{foo}; # still prints "bar" - } - } - =head1 SEE ALSO L<re>, L<perlmodlib/Pragmatic Modules>, L<perlvar/"%+">, L<perlvar/"%-">. ==== //depot/perl/pp_ctl.c#605 (text) ==== Index: perl/pp_ctl.c --- perl/pp_ctl.c#604~30629~ 2007-03-19 01:58:08.000000000 -0700 +++ perl/pp_ctl.c 2007-03-22 02:01:37.000000000 -0700 @@ -118,9 +118,9 @@ mg = mg_find(sv, PERL_MAGIC_qr); } if (mg) { - regexp * const re = (regexp *)mg->mg_obj; + regexp * const re = reg_temp_copy((regexp *)mg->mg_obj); ReREFCNT_dec(PM_GETRE(pm)); - PM_SETRE(pm, ReREFCNT_inc(re)); + PM_SETRE(pm, re); } else { STRLEN len; ==== //depot/perl/proto.h#808 (text+w) ==== Index: perl/proto.h --- perl/proto.h#807~30629~ 2007-03-19 01:58:08.000000000 -0700 +++ perl/proto.h 2007-03-22 02:01:37.000000000 -0700 @@ -1853,6 +1853,9 @@ __attribute__nonnull__(pTHX_6); PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r); +PERL_CALLCONV struct regexp* Perl_reg_temp_copy(pTHX_ struct regexp* r) + __attribute__nonnull__(pTHX_1); + PERL_CALLCONV void Perl_regfree_internal(pTHX_ struct regexp* r); PERL_CALLCONV char * Perl_reg_stringify(pTHX_ MAGIC *mg, STRLEN *lp, U32 *flags, I32 *haseval) __attribute__nonnull__(pTHX_1); ==== //depot/perl/regcomp.c#565 (text) ==== Index: perl/regcomp.c --- perl/regcomp.c#564~30647~ 2007-03-20 02:01:05.000000000 -0700 +++ perl/regcomp.c 2007-03-22 02:01:37.000000000 -0700 @@ -4183,7 +4183,7 @@ + (sizeof(STD_PAT_MODS) - 1) + (sizeof("(?:)") - 1); - Newx(r->wrapped, r->wraplen, char ); + Newx(r->wrapped, r->wraplen + 1, char ); p = r->wrapped; *p++='('; *p++='?'; if (has_k) @@ -4206,13 +4206,14 @@ } } - *p++=':'; + *p++ = ':'; Copy(RExC_precomp, p, r->prelen, char); r->precomp = p; p += r->prelen; if (has_runon) - *p++='\n'; - *p=')'; + *p++ = '\n'; + *p++ = ')'; + *p = 0; } r->intflags = 0; @@ -8665,31 +8666,93 @@ if (!r || (--r->refcnt > 0)) return; - - CALLREGFREE_PVT(r); /* free the private data */ + if (r->mother_re) { + ReREFCNT_dec(r->mother_re); + } else { + CALLREGFREE_PVT(r); /* free the private data */ + if (r->paren_names) + SvREFCNT_dec(r->paren_names); + Safefree(r->wrapped); + } + if (r->substrs) { + if (r->anchored_substr) + SvREFCNT_dec(r->anchored_substr); + if (r->anchored_utf8) + SvREFCNT_dec(r->anchored_utf8); + if (r->float_substr) + SvREFCNT_dec(r->float_substr); + if (r->float_utf8) + SvREFCNT_dec(r->float_utf8); + Safefree(r->substrs); + } RX_MATCH_COPY_FREE(r); #ifdef PERL_OLD_COPY_ON_WRITE if (r->saved_copy) - SvREFCNT_dec(r->saved_copy); + SvREFCNT_dec(r->saved_copy); #endif - if (r->substrs) { - if (r->anchored_substr) - SvREFCNT_dec(r->anchored_substr); - if (r->anchored_utf8) - SvREFCNT_dec(r->anchored_utf8); - if (r->float_substr) - SvREFCNT_dec(r->float_substr); - if (r->float_utf8) - SvREFCNT_dec(r->float_utf8); - Safefree(r->substrs); + if (r->swap) { + Safefree(r->swap->startp); + Safefree(r->swap->endp); + Safefree(r->swap); } - if (r->paren_names) - SvREFCNT_dec(r->paren_names); - Safefree(r->wrapped); Safefree(r->startp); Safefree(r->endp); Safefree(r); } + +/* reg_temp_copy() + + This is a hacky workaround to the structural issue of match results + being stored in the regexp structure which is in turn stored in + PL_curpm/PL_reg_curpm. The problem is that due to qr// the pattern + could be PL_curpm in multiple contexts, and could require multiple + result sets being associated with the pattern simultaneously, such + as when doing a recursive match with (??{$qr}) + + The solution is to make a lightweight copy of the regexp structure + when a qr// is returned from the code executed by (??{$qr}) this + lightweight copy doesnt actually own any of its data except for + the starp/end and the actual regexp structure itself. + +*/ + + +regexp * +Perl_reg_temp_copy (pTHX_ struct regexp *r) { + regexp *ret; + register const I32 npar = r->nparens+1; + (void)ReREFCNT_inc(r); + Newx(ret, 1, regexp); + StructCopy(r, ret, regexp); + Newx(ret->startp, npar, I32); + Copy(r->startp, ret->startp, npar, I32); + Newx(ret->endp, npar, I32); + Copy(r->endp, ret->endp, npar, I32); + ret->refcnt = 1; + if (r->substrs) { + struct reg_substr_datum *s; + I32 i; + Newx(ret->substrs, 1, struct reg_substr_data); + for (s = ret->substrs->data, i = 0; i < 3; i++, s++) { + s->min_offset = r->substrs->data[i].min_offset; + s->max_offset = r->substrs->data[i].max_offset; + s->end_shift = r->substrs->data[i].end_shift; + s->substr = SvREFCNT_inc(r->substrs->data[i].substr); + s->utf8_substr = SvREFCNT_inc(r->substrs->data[i].utf8_substr); + } + } + RX_MATCH_COPIED_off(ret); +#ifdef PERL_OLD_COPY_ON_WRITE + /* this is broken. */ + assert(0); + if (ret->saved_copy) + ret->saved_copy=NULL; +#endif + ret->mother_re = r; + ret->swap = NULL; + + return ret; +} #endif /* regfree_internal() @@ -8814,11 +8877,7 @@ Safefree(ri->data->what); Safefree(ri->data); } - if (ri->swap) { - Safefree(ri->swap->startp); - Safefree(ri->swap->endp); - Safefree(ri->swap); - } + Safefree(ri); } @@ -8848,7 +8907,7 @@ { dVAR; regexp *ret; - int i, npar; + I32 i, npar; struct reg_substr_datum *s; if (!r) @@ -8864,6 +8923,14 @@ Copy(r->startp, ret->startp, npar, I32); Newx(ret->endp, npar, I32); Copy(r->endp, ret->endp, npar, I32); + if(r->swap) { + Newx(ret->swap, 1, regexp_paren_ofs); + /* no need to copy these */ + Newx(ret->swap->startp, npar, I32); + Newx(ret->swap->endp, npar, I32); + } else { + ret->swap = NULL; + } if (r->substrs) { Newx(ret->substrs, 1, struct reg_substr_data); @@ -8877,11 +8944,12 @@ } else ret->substrs = NULL; - ret->wrapped = SAVEPVN(r->wrapped, r->wraplen); + ret->wrapped = SAVEPVN(r->wrapped, r->wraplen+1); ret->precomp = ret->wrapped + (r->precomp - r->wrapped); ret->prelen = r->prelen; ret->wraplen = r->wraplen; + ret->mother_re = NULL; ret->refcnt = r->refcnt; ret->minlen = r->minlen; ret->minlenret = r->minlenret; @@ -8942,14 +9010,6 @@ Newxc(reti, sizeof(regexp_internal) + (len+1)*sizeof(regnode), char, regexp_internal); Copy(ri->program, reti->program, len+1, regnode); - if(ri->swap) { - Newx(reti->swap, 1, regexp_paren_ofs); - /* no need to copy these */ - Newx(reti->swap->startp, npar, I32); - Newx(reti->swap->endp, npar, I32); - } else { - reti->swap = NULL; - } reti->regstclass = NULL; ==== //depot/perl/regcomp.h#119 (text) ==== Index: perl/regcomp.h --- perl/regcomp.h#118~30436~ 2007-03-01 02:54:09.000000000 -0800 +++ perl/regcomp.h 2007-03-22 02:01:37.000000000 -0700 @@ -101,11 +101,7 @@ /* This is the stuff that used to live in regexp.h that was truly private to the engine itself. It now lives here. */ -/* swap buffer for paren structs */ -typedef struct regexp_paren_ofs { - I32 *startp; - I32 *endp; -} regexp_paren_ofs; + typedef struct regexp_internal { int name_list_idx; /* Optional data index of an array of paren names */ @@ -118,7 +114,6 @@ U32 proglen; } u; - regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ regnode *regstclass; /* Optional startclass as identified or constructed by the optimiser */ struct reg_data *data; /* Additional miscellaneous data used by the program. ==== //depot/perl/regexec.c#523 (text) ==== Index: perl/regexec.c --- perl/regexec.c#522~30647~ 2007-03-20 02:01:05.000000000 -0700 +++ perl/regexec.c 2007-03-22 02:01:37.000000000 -0700 @@ -1652,9 +1652,8 @@ static void S_swap_match_buff (pTHX_ regexp *prog) { I32 *t; - RXi_GET_DECL(prog,progi); - if (!progi->swap) { + if (!prog->swap) { /* We have to be careful. If the previous successful match was from this regex we don't want a subsequent paritally successful match to clobber the old results. @@ -1662,16 +1661,16 @@ to the re, and switch the buffer each match. If we fail we switch it back, otherwise we leave it swapped. */ - Newxz(progi->swap, 1, regexp_paren_ofs); + Newxz(prog->swap, 1, regexp_paren_ofs); /* no need to copy these */ - Newxz(progi->swap->startp, prog->nparens + 1, I32); - Newxz(progi->swap->endp, prog->nparens + 1, I32); + Newxz(prog->swap->startp, prog->nparens + 1, I32); + Newxz(prog->swap->endp, prog->nparens + 1, I32); } - t = progi->swap->startp; - progi->swap->startp = prog->startp; + t = prog->swap->startp; + prog->swap->startp = prog->startp; prog->startp = t; - t = progi->swap->endp; - progi->swap->endp = prog->endp; + t = prog->swap->endp; + prog->swap->endp = prog->endp; prog->endp = t; } @@ -2611,6 +2610,10 @@ return 0; } +#define SETREX(Re1,Re2) \ + if (PL_reg_eval_set) PM_SETRE((PL_reg_curpm), (Re2)); \ + Re1 = (Re2) + STATIC I32 /* 0 failure, 1 success */ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) { @@ -3654,8 +3657,7 @@ } if (mg) { - re = (regexp *)mg->mg_obj; - (void)ReREFCNT_inc(re); + re = reg_temp_copy((regexp *)mg->mg_obj); /*XXX:dmq*/ } else { STRLEN len; @@ -3674,6 +3676,9 @@ PL_regsize = osize; } } + RX_MATCH_COPIED_off(re); + re->subbeg = rex->subbeg; + re->sublen = rex->sublen; rei = RXi_GET(re); DEBUG_EXECUTE_r( debug_start_match(re, do_utf8, locinput, PL_regeol, @@ -3715,7 +3720,7 @@ ST.prev_rex = rex; ST.prev_curlyx = cur_curlyx; - rex = re; + SETREX(rex,re); rexi = rei; cur_curlyx = NULL; ST.B = next; @@ -3735,7 +3740,7 @@ /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); regcpblow(ST.cp); cur_eval = ST.prev_eval; @@ -3751,7 +3756,7 @@ /* note: this is called twice; first after popping B, then A */ PL_reg_flags ^= ST.toggle_reg_flags; ReREFCNT_dec(rex); - rex = ST.prev_rex; + SETREX(rex,ST.prev_rex); rexi = RXi_GET(rex); PL_reginput = locinput; REGCP_UNWIND(ST.lastcp); @@ -4760,7 +4765,7 @@ PL_reg_flags ^= st->u.eval.toggle_reg_flags; st->u.eval.prev_rex = rex; /* inner */ - rex = cur_eval->u.eval.prev_rex; /* outer */ + SETREX(rex,cur_eval->u.eval.prev_rex); rexi = RXi_GET(rex); cur_curlyx = cur_eval->u.eval.prev_curlyx; ReREFCNT_inc(rex); ==== //depot/perl/regexp.h#92 (text) ==== Index: perl/regexp.h --- perl/regexp.h#91~30412~ 2007-02-26 08:49:45.000000000 -0800 +++ perl/regexp.h 2007-03-22 02:01:37.000000000 -0700 @@ -31,6 +31,7 @@ struct reg_data; struct regexp_engine; +struct regexp; struct reg_substr_datum { I32 min_offset; @@ -48,11 +49,19 @@ #else #define SV_SAVED_COPY #endif + +/* swap buffer for paren structs */ +typedef struct regexp_paren_ofs { + I32 *startp; + I32 *endp; +} regexp_paren_ofs; + /* this is ordered such that the most commonly used fields are at the start of the struct */ typedef struct regexp { /* what engine created this regexp? */ const struct regexp_engine* engine; + struct regexp* mother_re; /* what re is this a lightweight copy of? */ /* Information about the match that the perl core uses to manage things */ U32 extflags; /* Flags used both externally and internally */ @@ -71,8 +80,10 @@ /* Data about the last/current match. These are modified during matching*/ U32 lastparen; /* last open paren matched */ U32 lastcloseparen; /* last close paren matched */ + regexp_paren_ofs *swap; /* Swap copy of *startp / *endp */ I32 *startp; /* Array of offsets from start of string (@-) */ I32 *endp; /* Array of offsets from start of string (@+) */ + char *subbeg; /* saved or original string so \digit works forever. */ I32 sublen; /* Length of string pointed by subbeg */ @@ -216,7 +227,6 @@ #define RXf_TAINTED_SEEN 0x20000000 /* two bits here */ - #define RX_HAS_CUTGROUP(prog) ((prog)->intflags & PREGf_CUTGROUP_SEEN) #define RX_MATCH_TAINTED(prog) ((prog)->extflags & RXf_TAINTED_SEEN) #define RX_MATCH_TAINTED_on(prog) ((prog)->extflags |= RXf_TAINTED_SEEN) ==== //depot/perl/t/op/pat.t#282 (xtext) ==== Index: perl/t/op/pat.t --- perl/t/op/pat.t#281~30647~ 2007-03-20 02:01:05.000000000 -0700 +++ perl/t/op/pat.t 2007-03-22 02:01:37.000000000 -0700 @@ -4267,11 +4267,11 @@ $re = qr/^ ( (??{ $grabit }) ) $ /x; my @res = '0902862349' =~ $re; iseq(join("-",@res),"0902862349", - 'PL_curpm is set properly on nested eval # TODO'); + 'PL_curpm is set properly on nested eval'); our $qr = qr/ (o) (??{ $1 }) /x; ok( 'boob'=~/( b (??{ $qr }) b )/x && 1, - "PL_curpm, nested eval # TODO"); + "PL_curpm, nested eval"); } { @@ -4325,7 +4325,17 @@ ok($c=~/${c}|\x{100}/); ok(@w==0); } - +{ + local $Message = "corruption of match results of qr// across scopes"; + my $qr=qr/(fo+)(ba+r)/; + 'foobar'=~/$qr/; + iseq("$1$2","foobar"); + { + 'foooooobaaaaar'=~/$qr/; + iseq("$1$2",'foooooobaaaaar'); + } + iseq("$1$2","foobar"); +} # Test counter is at bottom of file. Put new tests above here. #------------------------------------------------------------------- # Keep the following tests last -- they may crash perl @@ -4395,7 +4405,7 @@ iseq(0+$::test,$::TestCount,"Got the right number of tests!"); # Don't forget to update this! BEGIN { - $::TestCount = 1652; + $::TestCount = 1655; print "1..$::TestCount\n"; } ==== //depot/perl/universal.c#158 (text) ==== Index: perl/universal.c --- perl/universal.c#157~30629~ 2007-03-19 01:58:08.000000000 -0700 +++ perl/universal.c 2007-03-22 02:01:37.000000000 -0700 @@ -333,11 +333,11 @@ newXSproto("Internals::inc_sub_generation",XS_Internals_inc_sub_generation, file, ""); newXSproto("re::is_regexp", XS_re_is_regexp, file, "$"); - newXSproto("re::regname", XS_re_regname, file, ";$$$"); - newXSproto("re::regnames", XS_re_regnames, file, ";$$"); - newXSproto("re::regnames_iterinit", XS_re_regnames_iterinit, file, ";$"); - newXSproto("re::regnames_iternext", XS_re_regnames_iternext, file, ";$$"); - newXSproto("re::regnames_count", XS_re_regnames_count, file, ";$"); + newXSproto("re::regname", XS_re_regname, file, ";$$"); + newXSproto("re::regnames", XS_re_regnames, file, ";$"); + newXSproto("re::regnames_iterinit", XS_re_regnames_iterinit, file, ""); + newXSproto("re::regnames_iternext", XS_re_regnames_iternext, file, ";$"); + newXSproto("re::regnames_count", XS_re_regnames_count, file, ""); } @@ -1143,31 +1143,23 @@ dVAR; dXSARGS; - if (items < 1 || items > 3) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "sv, qr = NULL, all = NULL"); + if (items < 1 || items > 2) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regname", "name[, all ]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { SV * sv = ST(0); - SV * qr; SV * all; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; SV *bufs = NULL; if (items < 2) - qr = NULL; - else { - qr = ST(1); - } - - if (items < 3) all = NULL; else { - all = ST(2); + all = ST(1); } { - re = Perl_get_re_arg( aTHX_ qr, 1, NULL); if (SvPOK(sv) && re && re->paren_names) { bufs = CALLREG_NAMEDBUF(re,sv,all && SvTRUE(all)); if (bufs) { @@ -1189,30 +1181,22 @@ { dVAR; dXSARGS; - if (items < 0 || items > 2) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "sv = NULL, all = NULL"); + if (items < 0 || items > 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames", "[all]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; SV * all; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; IV count = 0; if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - - if (items < 2) all = NULL; else { - all = ST(1); + all = ST(0); } { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); if (re && re->paren_names) { HV *hv= re->paren_names; (void)hv_iterinit(hv); @@ -1259,29 +1243,19 @@ { dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iterinit", "sv = NULL"); + if (items != 0 ) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iterinit"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; - regexp *re = NULL; - - if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); - if (re && re->paren_names) { - (void)hv_iterinit(re->paren_names); - XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); - } else { - XSRETURN_UNDEF; - } - } + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; + if (re && re->paren_names) { + (void)hv_iterinit(re->paren_names); + XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); + } else { + XSRETURN_UNDEF; + } PUTBACK; return; } @@ -1292,60 +1266,50 @@ { dVAR; dXSARGS; - if (items < 0 || items > 2) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iternext", "sv = NULL, all = NULL"); + if (items < 0 || items > 1) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_iternext", "[all]"); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; { - SV * sv; SV * all; - regexp *re; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - - if (items < 2) all = NULL; else { - all = ST(1); + all = ST(0); } - { - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); - if (re && re->paren_names) { - HV *hv= re->paren_names; - while (1) { - HE *temphe = hv_iternext_flags(hv,0); - if (temphe) { - IV i; - IV parno = 0; - SV* sv_dat = HeVAL(temphe); - I32 *nums = (I32*)SvPVX(sv_dat); - for ( i = 0; i < SvIVX(sv_dat); i++ ) { - if ((I32)(re->lastcloseparen) >= nums[i] && - re->startp[nums[i]] != -1 && - re->endp[nums[i]] != -1) - { - parno = nums[i]; - break; - } - } - if (parno || (all && SvTRUE(all))) { - STRLEN len; - char *pv = HePV(temphe, len); - XPUSHs(newSVpvn(pv,len)); - XSRETURN(1); + if (re && re->paren_names) { + HV *hv= re->paren_names; + while (1) { + HE *temphe = hv_iternext_flags(hv,0); + if (temphe) { + IV i; + IV parno = 0; + SV* sv_dat = HeVAL(temphe); + I32 *nums = (I32*)SvPVX(sv_dat); + for ( i = 0; i < SvIVX(sv_dat); i++ ) { + if ((I32)(re->lastcloseparen) >= nums[i] && + re->startp[nums[i]] != -1 && + re->endp[nums[i]] != -1) + { + parno = nums[i]; + break; } - } else { - break; } + if (parno || (all && SvTRUE(all))) { + STRLEN len; + char *pv = HePV(temphe, len); + XPUSHs(newSVpvn(pv,len)); + XSRETURN(1); + } + } else { + break; } } - XSRETURN_UNDEF; - } + } + XSRETURN_UNDEF; PUTBACK; return; } @@ -1354,22 +1318,16 @@ XS(XS_re_regnames_count) { - SV * sv; - regexp *re = NULL; + regexp *re = PL_curpm ? PM_GETRE(PL_curpm) : NULL; dVAR; dXSARGS; - if (items < 0 || items > 1) - Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", "sv = NULL"); + if (items != 0) + Perl_croak(aTHX_ "Usage: %s(%s)", "re::regnames_count", ""); PERL_UNUSED_VAR(cv); /* -W */ PERL_UNUSED_VAR(ax); /* -Wall */ SP -= items; - if (items < 1) - sv = NULL; - else { - sv = ST(0); - } - re = Perl_get_re_arg( aTHX_ sv, 1, NULL ); + if (re && re->paren_names) { XPUSHs(newSViv(HvTOTALKEYS(re->paren_names))); } else { End of Patch.