In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/4e0341d2ce817c9956f7f78e36bcaf8b764e18fc?hp=81baec7793948a407103a7d8ae91755bede993bd>

- Log -----------------------------------------------------------------
commit 4e0341d2ce817c9956f7f78e36bcaf8b764e18fc
Author: David Mitchell <[email protected]>
Date:   Wed Mar 25 17:11:40 2015 +0000

    Perl_save_re_context(): re-indent after last commit
    
    whitespace-only change.

M       regcomp.c

commit 3553f4fa11fd9e8bb0797ace43605cc33ebf32fa
Author: David Mitchell <[email protected]>
Date:   Wed Mar 25 16:59:04 2015 +0000

    save_re_context(): do "local $n" with no PL_curpm
    
    RT #124109.
    
    2c1f00b9036 localised PL_curpm to NULL when calling swash init code
    (i.e. perl-level code that is loaded and executed when something
    like "lc $large_codepoint" is executed).
    
    b4fa55d3f1 followed this up by gutting Perl_save_re_context(), since
    that function did, basically,
    
        if (PL_curpm) {
            for (i = 1; i <= RX_NPARENS(PM_GETRE(PL_curpm))) {
                do the C equivalent of the perl code "local ${i}";
            }
        }
    
    and now that PL_curpm was null, the code wasn't called any more.  However,
    it turns out that the localisation *was* still needed, it's just that
    nothing in the test suite actually tested for it.
    
    In something like the following:
    
        $x = "\x{41c}";
        $x =~ /(.*)/;
        $s = lc $1;
    
    pp_lc() calls get magic on $1, which sets $1's PV value to a copy of the
    substring captured by the current pattern match.
    Then pp_lc() calls a function to convert the string to upper case, which
    triggers a swash load, which calls perl code that does a pattern match
    and, most importantly, uses the value of $1. This triggers get magic on
    $1, which overwrites $1's PV value with a new value. When control returns
    to pp_lc(), $1 now holds the wrong string value.
    
    Hence $1, $2 etc need localising as well as PL_curpm.
    
    The old way that Perl_save_re_context() used to work (localising
    $1..${RX_NPARENS}) won't work directly when PL_curpm is NULL (as in the
    swash case), since we don't know how many vars to localise.
    
    In this case, hard-code it as localising $1,$2,$3 and add a porting
    test file that checks that the utf8.pm code and dependences don't
    use anything outside those 3 vars.

M       MANIFEST
M       regcomp.c
A       t/porting/re_context.t
M       t/re/pat_advanced.t

commit e8d8f801f452fb6a459fa7375ce32ec55300a01d
Author: David Mitchell <[email protected]>
Date:   Wed Mar 25 16:21:31 2015 +0000

    Revert "Gut Perl_save_re_context"
    
    This reverts commit b4fa55d3f12c6d98b13a8b3db4f8d921c8e56edc.
    
    Turns out we need Perl_save_re_context() after all

M       regcomp.c

commit 2782061f5102a81e1eae39cce864ce172fbea63d
Author: David Mitchell <[email protected]>
Date:   Thu Mar 19 20:35:57 2015 +0000

    Revert "Don’t call save_re_context"
    
    This reverts commit d28a9254e445aee7212523d9a7ff62ae0a743fec.
    
    Turns out we need save_re_context() after all

M       mg.c
M       regcomp.c
M       sv.c
M       utf8.c
M       util.c

commit 7c6e85ad045c7a9841bf1c62d1dd22bf1705a168
Author: David Mitchell <[email protected]>
Date:   Thu Mar 19 20:30:04 2015 +0000

    Revert "Mathomise save_re_context"
    
    This reverts commit 0ddd4a5b1910c8bfa9b7e55eb0db60a115fe368c.
    
    Turns out we need the save_re_context() function after all.

M       mathoms.c
M       regcomp.c
-----------------------------------------------------------------------

Summary of changes:
 MANIFEST               |  1 +
 mathoms.c              |  6 ------
 mg.c                   |  1 +
 regcomp.c              | 42 ++++++++++++++++++++++++++++++++++++++++++
 sv.c                   |  2 ++
 t/porting/re_context.t | 43 +++++++++++++++++++++++++++++++++++++++++++
 t/re/pat_advanced.t    | 13 +++++++++++++
 utf8.c                 |  5 +++++
 util.c                 |  1 +
 9 files changed, 108 insertions(+), 6 deletions(-)
 create mode 100644 t/porting/re_context.t

diff --git a/MANIFEST b/MANIFEST
index a5be49d..eaa205a 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -5384,6 +5384,7 @@ t/porting/perlfunc.t              Test that 
Functions_pm.PL can parse perlfunc.pod
 t/porting/podcheck.t           Test the POD of shipped modules is well formed
 t/porting/pod_rules.t          Check that various pod lists are consistent
 t/porting/readme.t             Check that all files in Porting/ are mentioned 
in Porting/README.pod
+t/porting/re_context.t         Check assumptions made by save_re_context()
 t/porting/regen.t              Check that regen.pl doesn't need running
 t/porting/ss_dup.t             Check that sv.c:ss_dup handle everything
 t/porting/test_bootstrap.t     Test that the instructions for test 
bootstrapping aren't accidentally overlooked.
diff --git a/mathoms.c b/mathoms.c
index bcce2ca..d659883 100644
--- a/mathoms.c
+++ b/mathoms.c
@@ -1792,12 +1792,6 @@ Perl_utf8_to_uvuni(pTHX_ const U8 *s, STRLEN *retlen)
     return NATIVE_TO_UNI(valid_utf8_to_uvchr(s, retlen));
 }
 
-void
-Perl_save_re_context(pTHX)
-{
-    PERL_UNUSED_CONTEXT;
-}
-
 /*
 =for apidoc Am|HV *|pad_compname_type|PADOFFSET po
 
diff --git a/mg.c b/mg.c
index b03510b..064a1ae 100644
--- a/mg.c
+++ b/mg.c
@@ -1802,6 +1802,7 @@ Perl_magic_methcall(pTHX_ SV *sv, const MAGIC *mg, SV 
*meth, U32 flags,
     if (flags & G_WRITING_TO_STDERR) {
        SAVETMPS;
 
+       save_re_context();
        SAVESPTR(PL_stderrgv);
        PL_stderrgv = NULL;
     }
diff --git a/regcomp.c b/regcomp.c
index 50a9e6c..5d5332d 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6180,6 +6180,7 @@ S_compile_runtime_code(pTHX_ RExC_state_t * const 
pRExC_state,
 
        ENTER;
        SAVETMPS;
+       save_re_context();
        PUSHSTACKi(PERLSI_REQUIRE);
         /* G_RE_REPARSING causes the toker to collapse \\ into \ when
          * parsing qr''; normally only q'' does this. It also alters
@@ -17710,6 +17711,47 @@ S_re_croak2(pTHX_ bool utf8, const char* pat1,const 
char* pat2,...)
     Perl_croak(aTHX_ "%"UTF8f, UTF8fARG(utf8, l1-1, buf));
 }
 
+/* XXX Here's a total kludge.  But we need to re-enter for swash routines. */
+
+#ifndef PERL_IN_XSUB_RE
+void
+Perl_save_re_context(pTHX)
+{
+    I32 nparens = -1;
+    I32 i;
+
+    /* Save $1..$n (#18107: UTF-8 s/(\w+)/uc($1)/e); AMS 20021106. */
+
+    if (PL_curpm) {
+       const REGEXP * const rx = PM_GETRE(PL_curpm);
+       if (rx)
+            nparens = RX_NPARENS(rx);
+    }
+
+    /* RT #124109. This is a complete hack; in the SWASHNEW case we know
+     * that PL_curpm will be null, but that utf8.pm and the modules it
+     * loads will only use $1..$3.
+     * The t/porting/re_context.t test file checks this assumption.
+     */
+    if (nparens == -1)
+        nparens = 3;
+
+    for (i = 1; i <= nparens; i++) {
+        char digits[TYPE_CHARS(long)];
+        const STRLEN len = my_snprintf(digits, sizeof(digits),
+                                       "%lu", (long)i);
+        GV *const *const gvp
+            = (GV**)hv_fetch(PL_defstash, digits, len, 0);
+
+        if (gvp) {
+            GV * const gv = *gvp;
+            if (SvTYPE(gv) == SVt_PVGV && GvSV(gv))
+                save_scalar(gv);
+        }
+    }
+}
+#endif
+
 #ifdef DEBUGGING
 
 STATIC void
diff --git a/sv.c b/sv.c
index 4a818f2..467dc24 100644
--- a/sv.c
+++ b/sv.c
@@ -15286,6 +15286,7 @@ Perl_sv_recode_to_utf8(pTHX_ SV *sv, SV *encoding)
            nsv = sv_newmortal();
            SvSetSV_nosteal(nsv, sv);
        }
+       save_re_context();
        PUSHMARK(sp);
        EXTEND(SP, 3);
        PUSHs(encoding);
@@ -15356,6 +15357,7 @@ Perl_sv_cat_decode(pTHX_ SV *dsv, SV *encoding,
        dSP;
        ENTER;
        SAVETMPS;
+       save_re_context();
        PUSHMARK(sp);
        EXTEND(SP, 6);
        PUSHs(encoding);
diff --git a/t/porting/re_context.t b/t/porting/re_context.t
new file mode 100644
index 0000000..5467b93
--- /dev/null
+++ b/t/porting/re_context.t
@@ -0,0 +1,43 @@
+#!./perl -w
+#
+# Check that utf8.pm and its dependencies only use the subset of the
+# $1..$n capture vars that Perl_save_re_context() is hard-coded to
+# localise, because that function has no efficient way of determining at
+# runtime what vars to localise.
+#
+# Note that this script tests for the existence of symbol table entries in
+# %::, so @4 etc would trigger a failure as well as $4.
+#
+# If tests start to fail, either (in order of descending preference):
+#
+# * fix utf8.pm or its dependencies so that any recent change no longer
+#   uses more special vars (ideally it would use no vars);
+#
+# * fix Perl_save_re_context() so that it localises more vars, then
+#   update this test script with the new relaxed var list.
+
+
+use warnings;
+use strict;
+
+# trigger the dependency loading
+
+my $x = lc "\x{411}";
+
+# determine which relevant vars those dependencies accessed
+
+my @vars =
+        grep !/^[0123]$/, # $0, and $1, ..$3 allowed
+        grep /^(?:\d+|[`'&])$/,  # numeric and $`, $&, $' vars
+        sort keys %::;
+
+# load any other modules *after* calculating @vars
+
+require './test.pl';
+
+plan(1);
+
+is(scalar @vars, 0, "extraneous vars")
+    or diag("extra vars seen: " . join(", ", map "*$_", @vars));
+
+exit 0;
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index 3eaad63..891bb66 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -2294,6 +2294,19 @@ EOP
     }
 
     {
+        fresh_perl_is(<<'EOF',
+                my $s = "\x{41c}";
+                $s =~ /(.*)/ or die;
+                $ls = lc $1;
+                print $ls eq lc $s ? "good\n" : "bad: [$ls]\n";
+EOF
+            "good\n",
+            {},
+            "swash triggered by lc() doesn't corrupt \$1"
+        );
+    }
+
+    {
         #' RT #119075
         no warnings 'regexp';   # Silence "has useless greediness modifier"
         local $@;
diff --git a/utf8.c b/utf8.c
index b9455bc..2a7a3d0 100644
--- a/utf8.c
+++ b/utf8.c
@@ -2407,6 +2407,7 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* 
name, SV *listsv, I32 m
        PUSHSTACKi(PERLSI_MAGIC);
        ENTER;
        SAVEHINTS();
+       save_re_context();
        /* We might get here via a subroutine signature which uses a utf8
         * parameter name, at which point PL_subname will have been set
         * but not yet used. */
@@ -2421,6 +2422,10 @@ Perl__core_swash_init(pTHX_ const char* pkg, const char* 
name, SV *listsv, I32 m
 #ifndef NO_TAINT_SUPPORT
            /* It is assumed that callers of this routine are not passing in
             * any user derived data.  */
+           /* Need to do this after save_re_context() as it will set
+            * PL_tainted to 1 while saving $1 etc (see the code after getrx:
+            * in Perl_magic_get).  Even line to create errsv_save can turn on
+            * PL_tainted.  */
            SAVEBOOL(TAINT_get);
            TAINT_NOT;
 #endif
diff --git a/util.c b/util.c
index e4e684c..8cf62f5 100644
--- a/util.c
+++ b/util.c
@@ -1529,6 +1529,7 @@ S_invoke_exception_hook(pTHX_ SV *ex, bool warn)
        SV *exarg;
 
        ENTER;
+       save_re_context();
        if (warn) {
            SAVESPTR(*hook);
            *hook = NULL;

--
Perl5 Master Repository

Reply via email to