In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/b949b68f22c917863062bdb655e0e956abeca90d?hp=5219f5ec5c453357ab78722da5a91806251ffb67>

- Log -----------------------------------------------------------------
commit b949b68f22c917863062bdb655e0e956abeca90d
Author: David Mitchell <[email protected]>
Date:   Sat Jan 21 15:47:43 2017 +0000

    avoid disabling utf8 pos cache on tainted strings
    
    RT #130584
    
    When pos() or similar is used on a utf8 string, perl attaches magic
    to it that caches a couple of byte<->char offset conversions. This can
    avoid quadratic behaviour when continually scanning a big chunk of a long
    string to convert a byte offset to a char offset when pos() is called.
    
    v5.17.3-203-g7d1328b added code to invalidate this cache when get magic is
    called on an SV, since the get magic may change the value of the SV.
    
    However, under -T, taint magic gets added to a tainted string, which
    includes a get method which doesn't actually change the SV's value.
    So make a special exception to get-magic-cache-invalidation if the only
    get magic on the string is taint.
    
    This stops code like the following going quadratic under -T:
    
        $_ = "... long tainted utf8 string ...";
        while ( /..../g) {
            my $p = pos(); # calculating pos() goes quadratic
        }
-----------------------------------------------------------------------

Summary of changes:
 mg.c           | 31 +++++++++++++++++++++++++------
 t/perf/taint.t | 22 ++++++++++++++++++++--
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/mg.c b/mg.c
index 69fdc93ae8..75196fa5d7 100644
--- a/mg.c
+++ b/mg.c
@@ -171,6 +171,7 @@ Perl_mg_get(pTHX_ SV *sv)
     const I32 mgs_ix = SSNEW(sizeof(MGS));
     bool saved = FALSE;
     bool have_new = 0;
+    bool taint_only = TRUE; /* the only get method seen is taint */
     MAGIC *newmg, *head, *cur, *mg;
 
     PERL_ARGS_ASSERT_MG_GET;
@@ -189,10 +190,13 @@ Perl_mg_get(pTHX_ SV *sv)
        if (!(mg->mg_flags & MGf_GSKIP) && vtbl && vtbl->svt_get) {
 
            /* taint's mg get is so dumb it doesn't need flag saving */
-           if (!saved && mg->mg_type != PERL_MAGIC_taint) {
-               save_magic(mgs_ix, sv);
-               saved = TRUE;
-           }
+           if (mg->mg_type != PERL_MAGIC_taint) {
+                taint_only = FALSE;
+                if (!saved) {
+                    save_magic(mgs_ix, sv);
+                    saved = TRUE;
+                }
+            }
 
            vtbl->svt_get(aTHX_ sv, mg);
 
@@ -210,8 +214,23 @@ Perl_mg_get(pTHX_ SV *sv)
                     ~(SVs_GMG|SVs_SMG|SVs_RMG);
        }
        else if (vtbl == &PL_vtbl_utf8) {
-           /* get-magic can reallocate the PV */
-           magic_setutf8(sv, mg);
+           /* get-magic can reallocate the PV, unless there's only taint
+             * magic */
+            if (taint_only) {
+                MAGIC *mg2;
+                for (mg2 = nextmg; mg2; mg2 = mg2->mg_moremagic) {
+                    if (   mg2->mg_type != PERL_MAGIC_taint
+                        && !(mg2->mg_flags & MGf_GSKIP)
+                        && mg2->mg_virtual
+                        && mg2->mg_virtual->svt_get
+                    ) {
+                        taint_only = FALSE;
+                        break;
+                    }
+                }
+            }
+            if (!taint_only)
+                magic_setutf8(sv, mg);
        }
 
        mg = nextmg;
diff --git a/t/perf/taint.t b/t/perf/taint.t
index 0c3ac82413..797f0ad905 100644
--- a/t/perf/taint.t
+++ b/t/perf/taint.t
@@ -28,16 +28,34 @@ use Scalar::Util qw(tainted);
 
 $| = 1;
 
-plan tests => 2;
+plan tests => 4;
 
 watchdog(60);
 
+my $taint = substr($ENV{PATH}, 0, 0); # and empty tainted string
+
 {
-    my $in = substr($ENV{PATH}, 0, 0) . ( "ab" x 200_000 );
+    my $in = $taint . ( "ab" x 200_000 );
     utf8::upgrade($in);
     ok(tainted($in), "performance issue only when tainted");
     while ($in =~ /\Ga+b/g) { }
     pass("\\G on tainted string");
 }
 
+# RT #130584
+# tainted string caused the utf8 pos cache to be cleared each time
+
+{
+    my $repeat = 30_000;
+    my $in = $taint . ("abcdefghijklmnopqrstuvwxyz" x $repeat);
+    utf8::upgrade($in);
+    ok(tainted($in), "performance issue only when tainted");
+    local ${^UTF8CACHE} = 1;  # defeat debugging
+    for my $i (1..$repeat) {
+        $in =~ /abcdefghijklmnopqrstuvwxyz/g or die;
+        my $p = pos($in); # this was slow
+    }
+    pass("RT #130584 pos on tainted utf8 string");
+}
+
 1;

--
Perl5 Master Repository

Reply via email to