In perl.git, the branch blead has been updated <https://perl5.git.perl.org/perl.git/commitdiff/576915daebd987c8149486c51d8423a1dd471ded?hp=78a643379185d2a728ff615b2a20bee0c66f9d4b>
- Log ----------------------------------------------------------------- commit 576915daebd987c8149486c51d8423a1dd471ded Author: David Mitchell <[email protected]> Date: Wed Nov 1 18:45:36 2017 +0000 multiconcat: use append_utf8_from_native_byte() This small inline function does what my code was doing manually in a couple of places. Should be no functional difference, just makes the code tidier. Suggested by Karl. commit b3baa1fe7b5f973daeb2a46cb3afae8624d0f0ab Author: David Mitchell <[email protected]> Date: Wed Nov 1 17:14:58 2017 +0000 sprintf-as-multiconcat: fix \x80 issue My recent OP_MULTICONCAT merge which (amongst other things) converts sprintfs with a constant format that only containing %s's into a multiconcat op, miscounted variant chars (i.e. chars like \x80, which if upgraded to utf8, expand the number of bytes they require). This could cause buffer overruns. Spotted by Karl Williamson++ NPD ----------------------------------------------------------------------- Summary of changes: op.c | 18 ++++++------------ pp_hot.c | 11 +++-------- t/op/sprintf2.t | 8 ++++++++ 3 files changed, 17 insertions(+), 20 deletions(-) diff --git a/op.c b/op.c index 689f696857..333e5b105d 100644 --- a/op.c +++ b/op.c @@ -2552,7 +2552,7 @@ S_sprintf_is_multiconcatable(pTHX_ OP *o,struct sprintf_ismc_info *info) for (p = s; p < e; p++) { if (*p != '%') { total_len++; - if (UTF8_IS_INVARIANT(*p)) + if (!UTF8_IS_INVARIANT(*p)) variant++; continue; } @@ -3237,19 +3237,13 @@ S_maybe_multiconcat(pTHX_ OP *o) aux[PERL_MULTICONCAT_IX_UTF8_LEN].size = ulen; for (n = 0; n < (nargs + 1); n++) { - SSize_t l, ul, i; - l = ul = (lens++)->size; - for (i = 0; i < l; i++) { + SSize_t i; + char * orig_up = up; + for (i = (lens++)->size; i > 0; i--) { U8 c = *p++; - if (UTF8_IS_INVARIANT(c)) - *up++ = c; - else { - *up++ = UTF8_EIGHT_BIT_HI(c); - *up++ = UTF8_EIGHT_BIT_LO(c); - ul++; - } + append_utf8_from_native_byte(c, (U8**)&up); } - (ulens++)->size = ul; + (ulens++)->size = (i < 0) ? i : up - orig_up; } } diff --git a/pp_hot.c b/pp_hot.c index fff91396ff..2ce77b38ef 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -969,16 +969,11 @@ PP(pp_multiconcat) len = -len; if (UNLIKELY(p)) { /* copy plain-but-variant pv to a utf8 targ */ + char * end_pv = dsv_pv + len; assert(dst_utf8); - while (len--) { + while (dsv_pv < end_pv) { U8 c = (U8) *p++; - if (UTF8_IS_INVARIANT(c)) - *dsv_pv++ = c; - else { - *dsv_pv++ = UTF8_EIGHT_BIT_HI(c); - *dsv_pv++ = UTF8_EIGHT_BIT_LO(c); - len--; - } + append_utf8_from_native_byte(c, (U8**)&dsv_pv); } } else diff --git a/t/op/sprintf2.t b/t/op/sprintf2.t index eb90c763d8..bf092032bb 100644 --- a/t/op/sprintf2.t +++ b/t/op/sprintf2.t @@ -1114,6 +1114,14 @@ like sprintf("%p", 0+'NaN'), qr/^[0-9a-f]+$/, "%p and NaN"; } } +# variant chars in constant format (not utf8, but change if upgraded) + +{ + my $x = "\x{100}"; + my $y = sprintf "%sa\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80", $x; + is $y, "\x{100}a\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80", + "\\x80 in format"; +} done_testing(); -- Perl5 Master Repository
