In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/4e0341d2ce817c9956f7f78e36bcaf8b764e18fc?hp=81baec7793948a407103a7d8ae91755bede993bd>
- Log ----------------------------------------------------------------- commit 4e0341d2ce817c9956f7f78e36bcaf8b764e18fc Author: David Mitchell <[email protected]> Date: Wed Mar 25 17:11:40 2015 +0000 Perl_save_re_context(): re-indent after last commit whitespace-only change. M regcomp.c commit 3553f4fa11fd9e8bb0797ace43605cc33ebf32fa Author: David Mitchell <[email protected]> Date: Wed Mar 25 16:59:04 2015 +0000 save_re_context(): do "local $n" with no PL_curpm RT #124109. 2c1f00b9036 localised PL_curpm to NULL when calling swash init code (i.e. perl-level code that is loaded and executed when something like "lc $large_codepoint" is executed). b4fa55d3f1 followed this up by gutting Perl_save_re_context(), since that function did, basically, if (PL_curpm) { for (i = 1; i <= RX_NPARENS(PM_GETRE(PL_curpm))) { do the C equivalent of the perl code "local ${i}"; } } and now that PL_curpm was null, the code wasn't called any more. However, it turns out that the localisation *was* still needed, it's just that nothing in the test suite actually tested for it. In something like the following: $x = "\x{41c}"; $x =~ /(.*)/; $s = lc $1; pp_lc() calls get magic on $1, which sets $1's PV value to a copy of the substring captured by the current pattern match. Then pp_lc() calls a function to convert the string to upper case, which triggers a swash load, which calls perl code that does a pattern match and, most importantly, uses the value of $1. This triggers get magic on $1, which overwrites $1's PV value with a new value. When control returns to pp_lc(), $1 now holds the wrong string value. Hence $1, $2 etc need localising as well as PL_curpm. The old way that Perl_save_re_context() used to work (localising $1..${RX_NPARENS}) won't work directly when PL_curpm is NULL (as in the swash case), since we don't know how many vars to localise. In this case, hard-code it as localising $1,$2,$3 and add a porting test file that checks that the utf8.pm code and dependences don't use anything outside those 3 vars. M MANIFEST M regcomp.c A t/porting/re_context.t M t/re/pat_advanced.t commit e8d8f801f452fb6a459fa7375ce32ec55300a01d Author: David Mitchell <[email protected]> Date: Wed Mar 25 16:21:31 2015 +0000 Revert "Gut Perl_save_re_context" This reverts commit b4fa55d3f12c6d98b13a8b3db4f8d921c8e56edc. Turns out we need Perl_save_re_context() after all M regcomp.c commit 2782061f5102a81e1eae39cce864ce172fbea63d Author: David Mitchell <[email protected]> Date: Thu Mar 19 20:35:57 2015 +0000 Revert "Donât call save_re_context" This reverts commit d28a9254e445aee7212523d9a7ff62ae0a743fec. Turns out we need save_re_context() after all M mg.c M regcomp.c M sv.c M utf8.c M util.c commit 7c6e85ad045c7a9841bf1c62d1dd22bf1705a168 Author: David Mitchell <[email protected]> Date: Thu Mar 19 20:30:04 2015 +0000 Revert "Mathomise save_re_context" This reverts commit 0ddd4a5b1910c8bfa9b7e55eb0db60a115fe368c. Turns out we need the save_re_context() function after all. M mathoms.c M regcomp.c ----------------------------------------------------------------------- Summary of changes: MANIFEST | 1 + mathoms.c | 6 ------ mg.c | 1 + regcomp.c | 42 ++++++++++++++++++++++++++++++++++++++++++ sv.c | 2 ++ t/porting/re_context.t | 43 +++++++++++++++++++++++++++++++++++++++++++ t/re/pat_advanced.t | 13 +++++++++++++ utf8.c | 5 +++++ util.c | 1 + 9 files changed, 108 insertions(+), 6 deletions(-) create mode 100644 t/porting/re_context.t diff --git a/MANIFEST b/MANIFEST index a5be49d..eaa205a 100644 --- a/MANIFEST +++ b/MANIFEST @@ -5384,6 +5384,7 @@ t/porting/perlfunc.t Test that Functions_pm.PL can parse perlfunc.pod t/porting/podcheck.t Test the POD of shipped modules is well formed t/porting/pod_rules.t Check that various pod lists are consistent t/porting/readme.t Check that all files in Porting/ are mentioned in Porting/README.pod +t/porting/re_context.t Check assumptions made by save_re_context() t/porting/regen.t Check that regen.pl doesn't need running t/porting/ss_dup.t Check that sv.c:ss_dup handle everything t/porting/test_bootstrap.t Test that the instructions for test bootstrapping aren't accidentally overlooked. diff --git a/mathoms.c b/mathoms.c index bcce2ca..d659883 100644 --- a/mathoms.c +++ b/mathoms.c @@ -1792,12 +1792,6 @@ Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen) return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen)); } -void -Perl_save_re_context(pTHX) -{ - PERL_UNUSED_CONTEXT; -} - /* =for apidoc Am|HV *|pad_compname_type|PADOFFSET po diff --git a/mg.c b/mg.c index b03510b..064a1ae 100644 --- a/mg.c +++ b/mg.c @@ -1802,6 +1802,7 @@ Perl_magic_methcall(pTHX_ SV *sv, const MAGIC *mg, SV *meth, U32 flags, if (flags & G_WRITING_TO_STDERR) { SAVETMPS; + save_re_context(); SAVESPTR(PL_stderrgv); PL_stderrgv = NULL; } diff --git a/regcomp.c b/regcomp.c index 50a9e6c..5d5332d 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6180,6 +6180,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const pRExC_state, ENTER; SAVETMPS; + save_re_context(); PUSHSTACKi(PERLSI_REQUIRE); /* G_RE_REPARSING causes the toker to collapse \\ into \ when * parsing qr''; normally only q'' does this. It also alters @@ -17710,6 +17711,47 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const char* pat2,...) Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf)); } +/* XXX Here's a total kludge. But we need to re-enter for swash routines. */ + +#ifndef PERL_IN_XSUB_RE +void +Perl_save_re_context(pTHX) +{ + I32 nparens = -1; + I32 i; + + /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */ + + if (PL_curpm) { + const REGEXP * const rx = PM_GETRE(PL_curpm); + if (rx) + nparens = RX_NPARENS(rx); + } + + /* RT #124109. This is a complete hack; in the SWASHNEW case we know + * that PL_curpm will be null, but that utf8.pm and the modules it + * loads will only use $1..$3. + * The t/porting/re_context.t test file checks this assumption. + */ + if (nparens == -1) + nparens = 3; + + for (i = 1; i <= nparens; i++) { + char digits[TYPE_CHARS(long)]; + const STRLEN len = my_snprintf(digits, sizeof(digits), + "%lu", (long)i); + GV *const *const gvp + = (GV**)hv_fetch(PL_defstash, digits, len, 0); + + if (gvp) { + GV * const gv = *gvp; + if (SvTYPE(gv) == SVt_PVGV && GvSV(gv)) + save_scalar(gv); + } + } +} +#endif + #ifdef DEBUGGING STATIC void diff --git a/sv.c b/sv.c index 4a818f2..467dc24 100644 --- a/sv.c +++ b/sv.c @@ -15286,6 +15286,7 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding) nsv = sv_newmortal(); SvSetSV_nosteal(nsv, sv); } + save_re_context(); PUSHMARK(sp); EXTEND(SP, 3); PUSHs(encoding); @@ -15356,6 +15357,7 @@ Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding, dSP; ENTER; SAVETMPS; + save_re_context(); PUSHMARK(sp); EXTEND(SP, 6); PUSHs(encoding); diff --git a/t/porting/re_context.t b/t/porting/re_context.t new file mode 100644 index 0000000..5467b93 --- /dev/null +++ b/t/porting/re_context.t @@ -0,0 +1,43 @@ +#!./perl -w +# +# Check that utf8.pm and its dependencies only use the subset of the +# $1..$n capture vars that Perl_save_re_context() is hard-coded to +# localise, because that function has no efficient way of determining at +# runtime what vars to localise. +# +# Note that this script tests for the existence of symbol table entries in +# %::, so @4 etc would trigger a failure as well as $4. +# +# If tests start to fail, either (in order of descending preference): +# +# * fix utf8.pm or its dependencies so that any recent change no longer +# uses more special vars (ideally it would use no vars); +# +# * fix Perl_save_re_context() so that it localises more vars, then +# update this test script with the new relaxed var list. + + +use warnings; +use strict; + +# trigger the dependency loading + +my $x = lc "\x{411}"; + +# determine which relevant vars those dependencies accessed + +my @vars = + grep !/^[0123]$/, # $0, and $1, ..$3 allowed + grep /^(?:\d+|[`'&])$/, # numeric and $`, $&, $' vars + sort keys %::; + +# load any other modules *after* calculating @vars + +require './test.pl'; + +plan(1); + +is(scalar @vars, 0, "extraneous vars") + or diag("extra vars seen: " . join(", ", map "*$_", @vars)); + +exit 0; diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t index 3eaad63..891bb66 100644 --- a/t/re/pat_advanced.t +++ b/t/re/pat_advanced.t @@ -2294,6 +2294,19 @@ EOP } { + fresh_perl_is(<<'EOF', + my $s = "\x{41c}"; + $s =~ /(.*)/ or die; + $ls = lc $1; + print $ls eq lc $s ? "good\n" : "bad: [$ls]\n"; +EOF + "good\n", + {}, + "swash triggered by lc() doesn't corrupt \$1" + ); + } + + { #' RT #119075 no warnings 'regexp'; # Silence "has useless greediness modifier" local $@; diff --git a/utf8.c b/utf8.c index b9455bc..2a7a3d0 100644 --- a/utf8.c +++ b/utf8.c @@ -2407,6 +2407,7 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m PUSHSTACKi(PERLSI_MAGIC); ENTER; SAVEHINTS(); + save_re_context(); /* We might get here via a subroutine signature which uses a utf8 * parameter name, at which point PL_subname will have been set * but not yet used. */ @@ -2421,6 +2422,10 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* name, SV *listsv, I32 m #ifndef NO_TAINT_SUPPORT /* It is assumed that callers of this routine are not passing in * any user derived data. */ + /* Need to do this after save_re_context() as it will set + * PL_tainted to 1 while saving $1 etc (see the code after getrx: + * in Perl_magic_get). Even line to create errsv_save can turn on + * PL_tainted. */ SAVEBOOL(TAINT_get); TAINT_NOT; #endif diff --git a/util.c b/util.c index e4e684c..8cf62f5 100644 --- a/util.c +++ b/util.c @@ -1529,6 +1529,7 @@ S_invoke_exception_hook(pTHX_ SV *ex, bool warn) SV *exarg; ENTER; + save_re_context(); if (warn) { SAVESPTR(*hook); *hook = NULL; -- Perl5 Master Repository
