In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/3cc6a05eedade6f51526feb18c12356b0589d77a?hp=14c482b0d8b7402f1b2b28d2918a55c83577d6ff>

- Log -----------------------------------------------------------------
commit 3cc6a05eedade6f51526feb18c12356b0589d77a
Author: Karl Williamson <[email protected]>
Date:   Wed Oct 19 21:20:48 2016 -0600

    utf8n_to_uvchr(): Reduce chances of reading beyond buffer
    
    utf8n_to_uvchr() can be called incorrectly, leading it to believe the
    buffer is longer than it actually is.  But often, it will be called with
    NUL terminated strings, so it can reduce it's chances of being fooled by
    refusing to read beyond a NUL.  The NUL will terminate any UTF-8 byte
    sequence, and the only reason to read beyond it would be to print all
    the expected bytes in the sequence.
    
    This commit is not the final word, but it is an easy fix for a common
    case.
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc |  2 +-
 proto.h   |  2 +-
 utf8.c    | 15 ++++++++++++++-
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 94cb984..5cc73b7 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1685,7 +1685,7 @@ inRP      |bool   |does_utf8_overflow|NN const U8 * const 
s|NN const U8 * e
 inRP   |bool   |is_utf8_overlong_given_start_byte_ok|NN const U8 * const 
s|const STRLEN len
 sMR    |char * |unexpected_non_continuation_text                       \
                |NN const U8 * const s                                  \
-               |const STRLEN print_len                                 \
+               |STRLEN print_len                                       \
                |const STRLEN non_cont_byte_pos                         \
                |const STRLEN expect_len
 sM     |char * |_byte_dump_string|NN const U8 * s|const STRLEN len
diff --git a/proto.h b/proto.h
index ec870f7..1d79c46 100644
--- a/proto.h
+++ b/proto.h
@@ -5635,7 +5635,7 @@ STATIC SV*        S_swatch_get(pTHX_ SV* swash, UV start, 
UV span)
 STATIC U8      S_to_lower_latin1(const U8 c, U8 *p, STRLEN *lenp)
                        __attribute__warn_unused_result__;
 
-STATIC char *  S_unexpected_non_continuation_text(pTHX_ const U8 * const s, 
const STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len)
+STATIC char *  S_unexpected_non_continuation_text(pTHX_ const U8 * const s, 
STRLEN print_len, const STRLEN non_cont_byte_pos, const STRLEN expect_len)
                        __attribute__warn_unused_result__;
 #define PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT      \
        assert(s)
diff --git a/utf8.c b/utf8.c
index d7450d7..f017f71 100644
--- a/utf8.c
+++ b/utf8.c
@@ -735,7 +735,7 @@ PERL_STATIC_INLINE char *
 S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
 
                                          /* How many bytes to print */
-                                         const STRLEN print_len,
+                                         STRLEN print_len,
 
                                          /* Which one is the non-continuation 
*/
                                          const STRLEN non_cont_byte_pos,
@@ -750,6 +750,7 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const s,
                                ? "immediately"
                                : Perl_form(aTHX_ "%d bytes",
                                                  (int) non_cont_byte_pos);
+    unsigned int i;
 
     PERL_ARGS_ASSERT_UNEXPECTED_NON_CONTINUATION_TEXT;
 
@@ -757,6 +758,18 @@ S_unexpected_non_continuation_text(pTHX_ const U8 * const 
s,
      * calculated, it's likely faster to pass it; verify under DEBUGGING */
     assert(expect_len == UTF8SKIP(s));
 
+    /* It is possible that utf8n_to_uvchr() was called incorrectly, with a
+     * length that is larger than is actually available in the buffer.  If we
+     * print all the bytes based on that length, we will read past the buffer
+     * end.  Often, the strings are NUL terminated, so to lower the chances of
+     * this happening, print the malformed bytes only up through any NUL. */
+    for (i = 1; i < print_len; i++) {
+        if (*(s + i) == '\0') {
+            print_len = i + 1;  /* +1 gets the NUL printed */
+            break;
+        }
+    }
+
     return Perl_form(aTHX_ "%s: %s (unexpected non-continuation byte 0x%02x,"
                            " %s after start byte 0x%02x; need %d bytes, got 
%d)",
                            malformed_text,

--
Perl5 Master Repository

Reply via email to