[perl.git] branch blead, updated. v5.25.9-77-g90c3aa0120

Dave Mitchell Sat, 04 Feb 2017 08:13:49 -0800

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/90c3aa01208e3c5b9ab464a058bbd2f6ebda4ff4?hp=7594f18f6aa16e16707d34a484c75a3e6c702b54>


- Log -----------------------------------------------------------------
commit 90c3aa01208e3c5b9ab464a058bbd2f6ebda4ff4
Author: David Mitchell <[email protected]>
Date:   Sat Feb 4 15:54:09 2017 +0000

    pp_formline: simplify growing of PL_formtarget
    
    There's some reasonably complex logic to try and second guess how much
    space to allocate or reallocate for the output buffer (some of which is
    my doing from 2011, 26e935cfa6e7).
    
    This commit removes most of this and now just does:
    
    initially, grow the buffer by the size of the format. If any further
    growing is needed later on (e.g. after a utf8 upgrade or due to @*) then
    just grow as needed. This may give less optimal growing in edge cases
    ( i.e. repeated smaller grows rather than one big grow), but the old code
    was often guessing wrong anyway.
    
    This commit also makes it *always* check whether PL_formtarget needs growing
    when about to append data to it, which is safer.

M       pp_ctl.c

commit e452bf1c9e9f30813b1f289188a6e8b0894575ba
Author: David Mitchell <[email protected]>
Date:   Sat Feb 4 15:10:49 2017 +0000

    buffer overrun with format and 'use bytes'
    
    RT #130703
    
    In the scope of 'use bytes', appending a string to a format where the
    format is utf8 and the string is non-utf8 but contains lots of chars
    with ords >= 128, the buffer could be overrun. This is due to all the
    \x80-type chars going from being stored as 1 bytes to 2 bytes, without
    growing PL_formtarget accordingly.
    
    This commit contains a minimal fix; the next commit will more generally
    tidy up the grow code in pp_formline.

M       pp_ctl.c
M       t/op/write.t
-----------------------------------------------------------------------

Summary of changes:
 pp_ctl.c     | 32 +++++++++++---------------------
 t/op/write.t | 18 +++++++++++++++++-
 2 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/pp_ctl.c b/pp_ctl.c
index f48f3013ce..0b759035ad 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -482,7 +482,6 @@ PP(pp_formline)
     NV value;
     bool gotsome = FALSE;   /* seen at least one non-blank item on this line */
     STRLEN len;             /* length of current sv */
-    STRLEN linemax;        /* estimate of output size in bytes */
     bool item_is_utf8 = FALSE;
     bool targ_is_utf8 = FALSE;
     const char *fmt;
@@ -505,8 +504,11 @@ PP(pp_formline)
        SvTAINTED_on(PL_formtarget);
     if (DO_UTF8(PL_formtarget))
        targ_is_utf8 = TRUE;
-    linemax = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1);
-    t = SvGROW(PL_formtarget, len + linemax + 1);
+    /* Usually the output data will be the same size as the format,
+     * so this is a good first guess. Later on, @* or utf8 upgrades
+     * may trigger further growing.
+     */
+    t = SvGROW(PL_formtarget, len + SvCUR(formsv) + 1);
     /* XXX from now onwards, SvCUR(PL_formtarget) is invalid */
     t += len;
     f = SvPV_const(formsv, len);
@@ -759,10 +761,9 @@ PP(pp_formline)
             * if trans, translate certain characters during the copy */
            {
                U8 *tmp = NULL;
-               STRLEN grow = 0;
+                STRLEN cur = t - SvPVX_const(PL_formtarget);
 
-               SvCUR_set(PL_formtarget,
-                         t - SvPVX_const(PL_formtarget));
+               SvCUR_set(PL_formtarget, cur);
 
                if (targ_is_utf8 && !item_is_utf8) {
                    source = tmp = bytes_to_utf8(source, &to_copy);
@@ -773,14 +774,10 @@ PP(pp_formline)
                           a problem we have a simple solution for.
                           Don't need get magic.  */
                        sv_utf8_upgrade_nomg(PL_formtarget);
+                        cur = SvCUR(PL_formtarget); /* may have changed */
                        targ_is_utf8 = TRUE;
                        /* re-calculate linemark */
                        s = (U8*)SvPVX(PL_formtarget);
-                       /* the bytes we initially allocated to append the
-                        * whole line may have been gobbled up during the
-                        * upgrade, so allocate a whole new line's worth
-                        * for safety */
-                       grow = linemax;
                        while (linemark--)
                            s += UTF8SKIP(s);
                        linemark = s - (U8*)SvPVX(PL_formtarget);
@@ -788,17 +785,10 @@ PP(pp_formline)
                    /* Easy. They agree.  */
                    assert (item_is_utf8 == targ_is_utf8);
                }
-               if (!trans)
-                   /* @* and ^* are the only things that can exceed
-                    * the linemax, so grow by the output size, plus
-                    * a whole new form's worth in case of any further
-                    * output */
-                   grow = linemax + to_copy;
-               if (grow)
-                   SvGROW(PL_formtarget, SvCUR(PL_formtarget) + grow + 1);
-               t = SvPVX(PL_formtarget) + SvCUR(PL_formtarget);
 
+                t = SvGROW(PL_formtarget, cur + to_copy + 1) + cur;
                Copy(source, t, to_copy, char);
+
                if (trans) {
                    /* blank out ~ or control chars, depending on trans.
                     * works on bytes not chars, so relies on not
@@ -814,7 +804,7 @@ PP(pp_formline)
                }
 
                t += to_copy;
-               SvCUR_set(PL_formtarget, SvCUR(PL_formtarget) + to_copy);
+               SvCUR_set(PL_formtarget, cur + to_copy);
                if (tmp)
                    Safefree(tmp);
                break;
diff --git a/t/op/write.t b/t/op/write.t
index d41e854c8a..a36da8ae0c 100644
--- a/t/op/write.t
+++ b/t/op/write.t
@@ -98,7 +98,7 @@ for my $tref ( @NumTests ){
 my $bas_tests = 21;
 
 # number of tests in section 3
-my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 12;
+my $bug_tests = 66 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 6 + 2 + 3 + 96 + 11 + 13;
 
 # number of tests in section 4
 my $hmb_tests = 37;
@@ -1562,6 +1562,22 @@ ok  defined *{$::{CmT}}{FORMAT}, "glob assign";
     formline $format, $orig, 12345;
     is $^A, ("x" x 100) . " 12345\n", "\@* doesn't overflow";
 
+    # ...nor this (RT #130703).
+    # Under 'use bytes', the two bytes (c2, 80) making up each \x80 char
+    # each get expanded to two bytes (so four in total per \x80 char); the
+    # buffer growth wasn't accounting for this doubling in size
+
+    {
+        local $^A = '';
+        my $format = "X\n\x{100}" . ("\x80" x 200);
+        my $expected = $format;
+        utf8::encode($expected);
+        use bytes;
+        formline($format);
+        is $^A, $expected, "RT #130703";
+    }
+
+
     # make sure it can cope with formats > 64k
 
     $format = 'x' x 65537;

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.25.9-77-g90c3aa0120

Reply via email to