In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/3e2d3818e517e0037c1ab6a482f31d50271f9e27?hp=65cccc5e92d46159b5887e72094aab44ee260ea3>

- Log -----------------------------------------------------------------
commit 3e2d3818e517e0037c1ab6a482f31d50271f9e27
Author: Nicholas Clark <[email protected]>
Date:   Sun Jul 11 20:11:10 2010 +0100

    Avoid UTF-8 cache panics with offsets beyond the string. Fixes RT #75898.
    
    Change S_sv_pos_u2b_forwards() to take a point to the (requested) UTF-8 
offset,
    and return the actual UTF-8 offset for the byte position returned. This 
ensures
    that the cache is consistent with reality.

M       embed.fnc
M       proto.h
M       sv.c
M       t/op/index.t

commit 48f9cf718354a5326f9e9d40d02a063952160024
Author: Nicholas Clark <[email protected]>
Date:   Sun Jul 11 17:17:37 2010 +0100

    In Perl_sv_pos_u2b_flags and S_sv_pos_u2b_cached, return early for offset 0.
    
    0 Unicode characters are always 0 octets long. Returning early ensures that
    any offsets we calculate later will always be non-zero.

M       sv.c

commit 503752a16bf16b90ff6c073c4bd5d818e68a2e2a
Author: Nicholas Clark <[email protected]>
Date:   Sun Jul 11 16:49:29 2010 +0100

    In S_sv_pos_u2b_midway, inline the call to S_sv_pos_u2b_forwards.

M       embed.fnc
M       proto.h
M       sv.c
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc    |    6 +++---
 proto.h      |   11 ++++++-----
 sv.c         |   40 ++++++++++++++++++++++++++++++----------
 t/op/index.t |   11 ++++++++++-
 4 files changed, 49 insertions(+), 19 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 1ba9041..d3f14b1 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1883,12 +1883,12 @@ s       |int    |sv_2iuv_non_preserve   |NN SV *const sv
 sR     |I32    |expect_number  |NN char **const pattern
 #
 sn     |STRLEN |sv_pos_u2b_forwards|NN const U8 *const start \
-               |NN const U8 *const send|STRLEN uoffset
+               |NN const U8 *const send|NN STRLEN *const uoffset
 sn     |STRLEN |sv_pos_u2b_midway|NN const U8 *const start \
-               |NN const U8 *send|const STRLEN uoffset|const STRLEN uend
+               |NN const U8 *send|STRLEN uoffset|const STRLEN uend
 s      |STRLEN |sv_pos_u2b_cached|NN SV *const sv|NN MAGIC **const mgp \
                |NN const U8 *const start|NN const U8 *const send \
-               |const STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
+               |STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
 s      |void   |utf8_mg_pos_cache_update|NN SV *const sv|NN MAGIC **const mgp \
                |const STRLEN byte|const STRLEN utf8|const STRLEN blen
 s      |STRLEN |sv_pos_b2u_midway|NN const U8 *const s|NN const U8 *const 
target \
diff --git a/proto.h b/proto.h
index f25b40c..c1c0f05 100644
--- a/proto.h
+++ b/proto.h
@@ -5809,19 +5809,20 @@ STATIC I32      S_expect_number(pTHX_ char **const 
pattern)
        assert(pattern)
 
 #
-STATIC STRLEN  S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const 
send, STRLEN uoffset)
+STATIC STRLEN  S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const 
send, STRLEN *const uoffset)
                        __attribute__nonnull__(1)
-                       __attribute__nonnull__(2);
+                       __attribute__nonnull__(2)
+                       __attribute__nonnull__(3);
 #define PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS   \
-       assert(start); assert(send)
+       assert(start); assert(send); assert(uoffset)
 
-STATIC STRLEN  S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, 
const STRLEN uoffset, const STRLEN uend)
+STATIC STRLEN  S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, 
STRLEN uoffset, const STRLEN uend)
                        __attribute__nonnull__(1)
                        __attribute__nonnull__(2);
 #define PERL_ARGS_ASSERT_SV_POS_U2B_MIDWAY     \
        assert(start); assert(send)
 
-STATIC STRLEN  S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, 
const U8 *const start, const U8 *const send, const STRLEN uoffset, STRLEN 
uoffset0, STRLEN boffset0)
+STATIC STRLEN  S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, 
const U8 *const start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, 
STRLEN boffset0)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2)
                        __attribute__nonnull__(pTHX_3)
diff --git a/sv.c b/sv.c
index c38a318..2f13091 100644
--- a/sv.c
+++ b/sv.c
@@ -6089,19 +6089,23 @@ Perl_sv_len_utf8(pTHX_ register SV *const sv)
    offset.  */
 static STRLEN
 S_sv_pos_u2b_forwards(const U8 *const start, const U8 *const send,
-                     STRLEN uoffset)
+                     STRLEN *const uoffset_p)
 {
     const U8 *s = start;
+    STRLEN uoffset = *uoffset_p;
 
     PERL_ARGS_ASSERT_SV_POS_U2B_FORWARDS;
 
-    while (s < send && uoffset--)
+    while (s < send && uoffset) {
+       --uoffset;
        s += UTF8SKIP(s);
+    }
     if (s > send) {
        /* This is the existing behaviour. Possibly it should be a croak, as
           it's actually a bounds error  */
        s = send;
     }
+    *uoffset_p -= uoffset;
     return s - start;
 }
 
@@ -6110,7 +6114,7 @@ S_sv_pos_u2b_forwards(const U8 *const start, const U8 
*const send,
    the passed in UTF-8 offset.  */
 static STRLEN
 S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
-                     const STRLEN uoffset, const STRLEN uend)
+                   STRLEN uoffset, const STRLEN uend)
 {
     STRLEN backw = uend - uoffset;
 
@@ -6120,7 +6124,14 @@ S_sv_pos_u2b_midway(const U8 *const start, const U8 
*send,
        /* The assumption is that going forwards is twice the speed of going
           forward (that's where the 2 * backw comes from).
           (The real figure of course depends on the UTF-8 data.)  */
-       return sv_pos_u2b_forwards(start, send, uoffset);
+       const U8 *s = start;
+
+       while (s < send && uoffset--)
+           s += UTF8SKIP(s);
+       assert (s <= send);
+       if (s > send)
+           s = send;
+       return s - start;
     }
 
     while (backw--) {
@@ -6141,7 +6152,7 @@ S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
    created if necessary, and the found value offered to it for update.  */
 static STRLEN
 S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const mgp, const U8 *const 
start,
-                   const U8 *const send, const STRLEN uoffset,
+                   const U8 *const send, STRLEN uoffset,
                    STRLEN uoffset0, STRLEN boffset0)
 {
     STRLEN boffset = 0; /* Actually always set, but let's keep gcc happy.  */
@@ -6151,6 +6162,9 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const 
mgp, const U8 *const start
 
     assert (uoffset >= uoffset0);
 
+    if (!uoffset)
+       return 0;
+
     if (!SvREADONLY(sv)
        && PL_utf8cache
        && (*mgp || (SvTYPE(sv) >= SVt_PVMG &&
@@ -6180,9 +6194,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const 
mgp, const U8 *const start
                                              uoffset - uoffset0,
                                              (*mgp)->mg_len - uoffset0);
                } else {
+                   uoffset -= uoffset0;
                    boffset = boffset0
                        + sv_pos_u2b_forwards(start + boffset0,
-                                               send, uoffset - uoffset0);
+                                               send, &uoffset);
+                   uoffset += uoffset0;
                }
            }
            else if (cache[2] < uoffset) {
@@ -6220,9 +6236,11 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const 
mgp, const U8 *const start
     }
 
     if (!found || PL_utf8cache < 0) {
-       const STRLEN real_boffset
-           = boffset0 + sv_pos_u2b_forwards(start + boffset0,
-                                              send, uoffset - uoffset0);
+       STRLEN real_boffset;
+       uoffset -= uoffset0;
+       real_boffset = boffset0 + sv_pos_u2b_forwards(start + boffset0,
+                                                     send, &uoffset);
+       uoffset += uoffset0;
 
        if (found && PL_utf8cache < 0) {
            if (real_boffset != boffset) {
@@ -6280,7 +6298,9 @@ Perl_sv_pos_u2b_flags(pTHX_ SV *const sv, STRLEN uoffset, 
STRLEN *const lenp,
        MAGIC *mg = NULL;
        boffset = sv_pos_u2b_cached(sv, &mg, start, send, uoffset, 0, 0);
 
-       if (lenp) {
+       if (lenp
+           && *lenp /* don't bother doing work for 0, as its bytes equivalent
+                       is 0, and *lenp is already set to that.  */) {
            /* Convert the relative offset to absolute.  */
            const STRLEN uoffset2 = uoffset + *lenp;
            const STRLEN boffset2
diff --git a/t/op/index.t b/t/op/index.t
index 59b5542..5ef69fc 100644
--- a/t/op/index.t
+++ b/t/op/index.t
@@ -7,7 +7,7 @@ BEGIN {
 }
 
 use strict;
-plan( tests => 111 );
+plan( tests => 113 );
 
 run_tests() unless caller;
 
@@ -194,4 +194,13 @@ SKIP: {
     }
 }
 
+{
+    # RT#75898
+    is(eval { utf8::upgrade($_ = " "); index $_, " ", 72 }, -1,
+       'UTF-8 cache handles offset beyond the end of the string');
+    $_ = "\x{100}BC";
+    is(index($_, "C", 4), -1,
+       'UTF-8 cache handles offset beyond the end of the string');
+}
+
 }

--
Perl5 Master Repository

Reply via email to