In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/b949b68f22c917863062bdb655e0e956abeca90d?hp=5219f5ec5c453357ab78722da5a91806251ffb67>
- Log ----------------------------------------------------------------- commit b949b68f22c917863062bdb655e0e956abeca90d Author: David Mitchell <[email protected]> Date: Sat Jan 21 15:47:43 2017 +0000 avoid disabling utf8 pos cache on tainted strings RT #130584 When pos() or similar is used on a utf8 string, perl attaches magic to it that caches a couple of byte<->char offset conversions. This can avoid quadratic behaviour when continually scanning a big chunk of a long string to convert a byte offset to a char offset when pos() is called. v5.17.3-203-g7d1328b added code to invalidate this cache when get magic is called on an SV, since the get magic may change the value of the SV. However, under -T, taint magic gets added to a tainted string, which includes a get method which doesn't actually change the SV's value. So make a special exception to get-magic-cache-invalidation if the only get magic on the string is taint. This stops code like the following going quadratic under -T: $_ = "... long tainted utf8 string ..."; while ( /..../g) { my $p = pos(); # calculating pos() goes quadratic } ----------------------------------------------------------------------- Summary of changes: mg.c | 31 +++++++++++++++++++++++++------ t/perf/taint.t | 22 ++++++++++++++++++++-- 2 files changed, 45 insertions(+), 8 deletions(-) diff --git a/mg.c b/mg.c index 69fdc93ae8..75196fa5d7 100644 --- a/mg.c +++ b/mg.c @@ -171,6 +171,7 @@ Perl_mg_get(pTHX_ SV *sv) const I32 mgs_ix = SSNEW(sizeof(MGS)); bool saved = FALSE; bool have_new = 0; + bool taint_only = TRUE; /* the only get method seen is taint */ MAGIC *newmg, *head, *cur, *mg; PERL_ARGS_ASSERT_MG_GET; @@ -189,10 +190,13 @@ Perl_mg_get(pTHX_ SV *sv) if (!(mg->mg_flags & MGf_GSKIP) && vtbl && vtbl->svt_get) { /* taint's mg get is so dumb it doesn't need flag saving */ - if (!saved && mg->mg_type != PERL_MAGIC_taint) { - save_magic(mgs_ix, sv); - saved = TRUE; - } + if (mg->mg_type != PERL_MAGIC_taint) { + taint_only = FALSE; + if (!saved) { + save_magic(mgs_ix, sv); + saved = TRUE; + } + } vtbl->svt_get(aTHX_ sv, mg); @@ -210,8 +214,23 @@ Perl_mg_get(pTHX_ SV *sv) ~(SVs_GMG|SVs_SMG|SVs_RMG); } else if (vtbl == &PL_vtbl_utf8) { - /* get-magic can reallocate the PV */ - magic_setutf8(sv, mg); + /* get-magic can reallocate the PV, unless there's only taint + * magic */ + if (taint_only) { + MAGIC *mg2; + for (mg2 = nextmg; mg2; mg2 = mg2->mg_moremagic) { + if ( mg2->mg_type != PERL_MAGIC_taint + && !(mg2->mg_flags & MGf_GSKIP) + && mg2->mg_virtual + && mg2->mg_virtual->svt_get + ) { + taint_only = FALSE; + break; + } + } + } + if (!taint_only) + magic_setutf8(sv, mg); } mg = nextmg; diff --git a/t/perf/taint.t b/t/perf/taint.t index 0c3ac82413..797f0ad905 100644 --- a/t/perf/taint.t +++ b/t/perf/taint.t @@ -28,16 +28,34 @@ use Scalar::Util qw(tainted); $| = 1; -plan tests => 2; +plan tests => 4; watchdog(60); +my $taint = substr($ENV{PATH}, 0, 0); # and empty tainted string + { - my $in = substr($ENV{PATH}, 0, 0) . ( "ab" x 200_000 ); + my $in = $taint . ( "ab" x 200_000 ); utf8::upgrade($in); ok(tainted($in), "performance issue only when tainted"); while ($in =~ /\Ga+b/g) { } pass("\\G on tainted string"); } +# RT #130584 +# tainted string caused the utf8 pos cache to be cleared each time + +{ + my $repeat = 30_000; + my $in = $taint . ("abcdefghijklmnopqrstuvwxyz" x $repeat); + utf8::upgrade($in); + ok(tainted($in), "performance issue only when tainted"); + local ${^UTF8CACHE} = 1; # defeat debugging + for my $i (1..$repeat) { + $in =~ /abcdefghijklmnopqrstuvwxyz/g or die; + my $p = pos($in); # this was slow + } + pass("RT #130584 pos on tainted utf8 string"); +} + 1; -- Perl5 Master Repository
