In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/90c3aa01208e3c5b9ab464a058bbd2f6ebda4ff4?hp=7594f18f6aa16e16707d34a484c75a3e6c702b54>
- Log ----------------------------------------------------------------- commit 90c3aa01208e3c5b9ab464a058bbd2f6ebda4ff4 Author: David Mitchell <[email protected]> Date: Sat Feb 4 15:54:09 2017 +0000 pp_formline: simplify growing of PL_formtarget There's some reasonably complex logic to try and second guess how much space to allocate or reallocate for the output buffer (some of which is my doing from 2011, 26e935cfa6e7). This commit removes most of this and now just does: initially, grow the buffer by the size of the format. If any further growing is needed later on (e.g. after a utf8 upgrade or due to @*) then just grow as needed. This may give less optimal growing in edge cases ( i.e. repeated smaller grows rather than one big grow), but the old code was often guessing wrong anyway. This commit also makes it *always* check whether PL_formtarget needs growing when about to append data to it, which is safer. M pp_ctl.c commit e452bf1c9e9f30813b1f289188a6e8b0894575ba Author: David Mitchell <[email protected]> Date: Sat Feb 4 15:10:49 2017 +0000 buffer overrun with format and 'use bytes' RT #130703 In the scope of 'use bytes', appending a string to a format where the format is utf8 and the string is non-utf8 but contains lots of chars with ords >= 128, the buffer could be overrun. This is due to all the \x80-type chars going from being stored as 1 bytes to 2 bytes, without growing PL_formtarget accordingly. This commit contains a minimal fix; the next commit will more generally tidy up the grow code in pp_formline. M pp_ctl.c M t/op/write.t ----------------------------------------------------------------------- Summary of changes: pp_ctl.c | 32 +++++++++++--------------------- t/op/write.t | 18 +++++++++++++++++- 2 files changed, 28 insertions(+), 22 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index f48f3013ce..0b759035ad 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -482,7 +482,6 @@ PP(pp_formline) NV value; bool gotsome = FALSE; /* seen at least one non-blank item on this line */ STRLEN len; /* length of current sv */ - STRLEN linemax; /* estimate of output size in bytes */ bool item_is_utf8 = FALSE; bool targ_is_utf8 = FALSE; const char *fmt; @@ -505,8 +504,11 @@ PP(pp_formline) SvTAINTED_on(PL_formtarget); if (DO_UTF8(PL_formtarget)) targ_is_utf8 = TRUE; - linemax = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1); - t = SvGROW(PL_formtarget, len + linemax + 1); + /* Usually the output data will be the same size as the format, + * so this is a good first guess. Later on, @* or utf8 upgrades + * may trigger further growing. + */ + t = SvGROW(PL_formtarget, len + SvCUR(formsv) + 1); /* XXX from now onwards, SvCUR(PL_formtarget) is invalid */ t += len; f = SvPV_const(formsv, len); @@ -759,10 +761,9 @@ PP(pp_formline) * if trans, translate certain characters during the copy */ { U8 *tmp = NULL; - STRLEN grow = 0; + STRLEN cur = t - SvPVX_const(PL_formtarget); - SvCUR_set(PL_formtarget, - t - SvPVX_const(PL_formtarget)); + SvCUR_set(PL_formtarget, cur); if (targ_is_utf8 && !item_is_utf8) { source = tmp = bytes_to_utf8(source, &to_copy); @@ -773,14 +774,10 @@ PP(pp_formline) a problem we have a simple solution for. Don't need get magic. */ sv_utf8_upgrade_nomg(PL_formtarget); + cur = SvCUR(PL_formtarget); /* may have changed */ targ_is_utf8 = TRUE; /* re-calculate linemark */ s = (U8*)SvPVX(PL_formtarget); - /* the bytes we initially allocated to append the - * whole line may have been gobbled up during the - * upgrade, so allocate a whole new line's worth - * for safety */ - grow = linemax; while (linemark--) s += UTF8SKIP(s); linemark = s - (U8*)SvPVX(PL_formtarget); @@ -788,17 +785,10 @@ PP(pp_formline) /* Easy. They agree. */ assert (item_is_utf8 == targ_is_utf8); } - if (!trans) - /* @* and ^* are the only things that can exceed - * the linemax, so grow by the output size, plus - * a whole new form's worth in case of any further - * output */ - grow = linemax + to_copy; - if (grow) - SvGROW(PL_formtarget, SvCUR(PL_formtarget) + grow + 1); - t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget); + t = SvGROW(PL_formtarget, cur + to_copy + 1) + cur; Copy(source, t, to_copy, char); + if (trans) { /* blank out ~ or control chars, depending on trans. * works on bytes not chars, so relies on not @@ -814,7 +804,7 @@ PP(pp_formline) } t += to_copy; - SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy); + SvCUR_set(PL_formtarget, cur + to_copy); if (tmp) Safefree(tmp); break; diff --git a/t/op/write.t b/t/op/write.t index d41e854c8a..a36da8ae0c 100644 --- a/t/op/write.t +++ b/t/op/write.t @@ -98,7 +98,7 @@ for my $tref ( @NumTests ){ my $bas_tests = 21; # number of tests in section 3 -my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 12; +my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 13; # number of tests in section 4 my $hmb_tests = 37; @@ -1562,6 +1562,22 @@ ok defined *{$::{CmT}}{FORMAT}, "glob assign"; formline $format, $orig, 12345; is $^A, ("x" x 100) . " 12345\n", "\@* doesn't overflow"; + # ...nor this (RT #130703). + # Under 'use bytes', the two bytes (c2, 80) making up each \x80 char + # each get expanded to two bytes (so four in total per \x80 char); the + # buffer growth wasn't accounting for this doubling in size + + { + local $^A = ''; + my $format = "X\n\x{100}" . ("\x80" x 200); + my $expected = $format; + utf8::encode($expected); + use bytes; + formline($format); + is $^A, $expected, "RT #130703"; + } + + # make sure it can cope with formats > 64k $format = 'x' x 65537; -- Perl5 Master Repository
