In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/85375852c1b4f84701cfe21f1161546c33a20d42?hp=c9701768842137264d0245944b038a5c23e589fe>

- Log -----------------------------------------------------------------
commit 85375852c1b4f84701cfe21f1161546c33a20d42
Author: Father Chrysostomos <[email protected]>
Date:   Tue Jul 23 14:38:30 2013 -0700

    pos.t: test something I almost broke

M       t/op/pos.t

commit e5b5ece22456ae38c5fbae9b5630d074b5a23080
Author: Father Chrysostomos <[email protected]>
Date:   Tue Jul 23 14:37:29 2013 -0700

    fix typo in sv.c apidocs
    
    sv_pos_u2b_flags has no offsetp parameter.  This was copied from
    sv_pos_u2b.

M       sv.c

commit ab0894e548ee5755214913c09ee98d7335bcb698
Author: Father Chrysostomos <[email protected]>
Date:   Sun Jul 21 06:56:07 2013 -0700

    sv.c: Remove overflow check in utf8 length cache
    
    In order to make large values of pos() possible, the previous commit
    enlarged the mg_len field of the magic struct.  That field is also
    used for cached utf8 length values.  Since it is now large enough to
    store any length the OS/compiler supports, the overflow never happens.

M       sv.c

commit 6174b39a88cd48740c024cfb6035edb6ffed9f2d
Author: Father Chrysostomos <[email protected]>
Date:   Sun Jul 21 00:38:28 2013 -0700

    [perl #72766] Allow huge pos() settings
    
    This is part of #116907, too.  It also fixes #72924 as a side effect;
    the next commit will explain.
    
    The value of pos($foo) was being stored as an I32, not allowing values
    above I32_MAX.  Change it to SSize_t (the signed equivalent of size_t,
    representing the maximum string length the OS/compiler supports).
    
    This is accomplished by changing the size of the entry in the magic
    struct, which is the simplest fix.
    
    Other parts of the code base can benefit from this, too.
    
    We actually cast the pos value to STRLEN (size_t) when reading
    it, to allow *very* long strings.  Only the value -1 is special,
    meaning there is no pos.  So the maximum supported offset is
    2**sizeof(size_t)-2.
    
    The regexp engine itself still cannot handle large strings, so being
    able to set pos to large values is useless right now.  This is but one
    piece in a larger puzzle.
    
    Changing the size of mg->mg_len also requires that
    Perl_hv_placeholders_p change its type.  This function
    should in fact not be in the API, since it exists
    solely to implement the HvPLACEHOLDERS macro.  See
    <https://rt.perl.org/rt3/Ticket/Display.html?id=116907#txn-1237043>.

M       MANIFEST
M       embed.fnc
M       hv.c
M       mg.c
M       mg.h
M       pp.c
M       proto.h
A       t/bigmem/pos.t

commit ef54055c17bc9effcdf6a8135d2b375b7c35dd62
Author: Father Chrysostomos <[email protected]>
Date:   Sun Jul 21 00:30:20 2013 -0700

    Add sv_pos_b2u_flags
    
    This, similar to sv_pos_u2b_flags, is a more friendly variant of
    sv_pos_u2b that works with 2GB strings and actually returns a
    value instead of modifying a passed-in value in place through
    a pointer.
    
    The next commit will use this.

M       embed.fnc
M       embed.h
M       proto.h
M       sv.c
-----------------------------------------------------------------------

Summary of changes:
 MANIFEST              |  1 +
 embed.fnc             |  4 ++-
 embed.h               |  1 +
 hv.c                  |  2 +-
 mg.c                  |  8 ++---
 mg.h                  |  2 +-
 pod/perl5180delta.pod |  2 +-
 pp.c                  |  8 ++---
 proto.h               |  7 +++-
 sv.c                  | 92 ++++++++++++++++++++++++++++++++-------------------
 t/bigmem/pos.t        | 25 ++++++++++++++
 t/op/pos.t            |  4 ++-
 12 files changed, 108 insertions(+), 48 deletions(-)
 create mode 100644 t/bigmem/pos.t

diff --git a/MANIFEST b/MANIFEST
index 8fb53e0..4b68b19 100644
--- a/MANIFEST
+++ b/MANIFEST
@@ -4848,6 +4848,7 @@ t/base/rs.t                       See if record-read works
 t/base/term.t                  See if various terms work
 t/base/while.t                 See if while work
 t/benchmark/rt26188-speed-up-keys-on-empty-hash.t      Benchmark if keys on 
empty hashes is fast enough
+t/bigmem/pos.t                 Check that pos() handles large offsets
 t/bigmem/read.t                        Check read() handles large offsets
 t/bigmem/vec.t                 Check vec() handles large offsets
 t/cmd/elsif.t                  See if else-if works
diff --git a/embed.fnc b/embed.fnc
index 18919f1..93944ba 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1375,6 +1375,8 @@ Apd       |void   |sv_pos_u2b     |NULLOK SV *const sv|NN 
I32 *const offsetp|NULLOK I32 *con
 Apd    |STRLEN |sv_pos_u2b_flags|NN SV *const sv|STRLEN uoffset \
                                |NULLOK STRLEN *const lenp|U32 flags
 Apd    |void   |sv_pos_b2u     |NULLOK SV *const sv|NN I32 *const offsetp
+Apd    |STRLEN |sv_pos_b2u_flags|NN SV *const sv|STRLEN const offset \
+                                |U32 flags
 Amdb   |char*  |sv_pvn_force   |NN SV* sv|NULLOK STRLEN* lp
 Apd    |char*  |sv_pvutf8n_force|NN SV *const sv|NULLOK STRLEN *const lp
 Apd    |char*  |sv_pvbyten_force|NN SV *const sv|NULLOK STRLEN *const lp
@@ -2458,7 +2460,7 @@ poM       |AV**   |hv_backreferences_p    |NN HV *hv
 poM    |void   |hv_kill_backrefs       |NN HV *hv
 #endif
 Apd    |void   |hv_clear_placeholders  |NN HV *hv
-ApoR   |I32*   |hv_placeholders_p      |NN HV *hv
+XpoR   |SSize_t*|hv_placeholders_p     |NN HV *hv
 ApoR   |I32    |hv_placeholders_get    |NN const HV *hv
 Apo    |void   |hv_placeholders_set    |NN HV *hv|I32 ph
 
diff --git a/embed.h b/embed.h
index 795dd8c..4a0d28b 100644
--- a/embed.h
+++ b/embed.h
@@ -607,6 +607,7 @@
 #define sv_nv(a)               Perl_sv_nv(aTHX_ a)
 #define sv_peek(a)             Perl_sv_peek(aTHX_ a)
 #define sv_pos_b2u(a,b)                Perl_sv_pos_b2u(aTHX_ a,b)
+#define sv_pos_b2u_flags(a,b,c)        Perl_sv_pos_b2u_flags(aTHX_ a,b,c)
 #define sv_pos_u2b(a,b,c)      Perl_sv_pos_u2b(aTHX_ a,b,c)
 #define sv_pos_u2b_flags(a,b,c,d)      Perl_sv_pos_u2b_flags(aTHX_ a,b,c,d)
 #define sv_pvbyten(a,b)                Perl_sv_pvbyten(aTHX_ a,b)
diff --git a/hv.c b/hv.c
index a2db86a..22d5603 100644
--- a/hv.c
+++ b/hv.c
@@ -2895,7 +2895,7 @@ S_share_hek_flags(pTHX_ const char *str, I32 len, U32 
hash, int flags)
     return HeKEY_hek(entry);
 }
 
-I32 *
+SSize_t *
 Perl_hv_placeholders_p(pTHX_ HV *hv)
 {
     dVAR;
diff --git a/mg.c b/mg.c
index 0dd23f6..4ef6c25 100644
--- a/mg.c
+++ b/mg.c
@@ -2096,11 +2096,11 @@ Perl_magic_getpos(pTHX_ SV *sv, MAGIC *mg)
     PERL_ARGS_ASSERT_MAGIC_GETPOS;
     PERL_UNUSED_ARG(mg);
 
-    if (found && found->mg_len >= 0) {
-           I32 i = found->mg_len;
+    if (found && found->mg_len != -1) {
+           STRLEN i = found->mg_len;
            if (DO_UTF8(lsv))
-               sv_pos_b2u(lsv, &i);
-           sv_setiv(sv, i);
+               i = sv_pos_b2u_flags(lsv, i, SV_GMAGIC|SV_CONST_RETURN);
+           sv_setuv(sv, i);
            return 0;
     }
     SvOK_off(sv);
diff --git a/mg.h b/mg.h
index 4114941..de673d4 100644
--- a/mg.h
+++ b/mg.h
@@ -26,7 +26,7 @@ struct magic {
     U16                mg_private;
     char       mg_type;
     U8         mg_flags;
-    I32                mg_len;
+    SSize_t    mg_len;
     SV*                mg_obj;
     char*      mg_ptr;
 };
diff --git a/pod/perl5180delta.pod b/pod/perl5180delta.pod
index 86c4289..6ce41ef 100644
--- a/pod/perl5180delta.pod
+++ b/pod/perl5180delta.pod
@@ -2032,7 +2032,7 @@ accepts the default value.  This helps B<perldoc> when 
handling ANSI escapes
 The test suite now has a section for tests that require very large amounts
 of memory.  These tests won't run by default; they can be enabled by
 setting the C<PERL_TEST_MEMORY> environment variable to the number of
-gibibytes of memory that may be safely used.
+gibabytes of memory that may be safely used.
 
 =back
 
diff --git a/pp.c b/pp.c
index 1aaeefc..cadfe96 100644
--- a/pp.c
+++ b/pp.c
@@ -439,12 +439,12 @@ PP(pp_pos)
     }
     else {
            const MAGIC * const mg = mg_find_mglob(sv);
-           if (mg && mg->mg_len >= 0) {
+           if (mg && mg->mg_len != -1) {
                dTARGET;
-               I32 i = mg->mg_len;
+               STRLEN i = mg->mg_len;
                if (DO_UTF8(sv))
-                   sv_pos_b2u(sv, &i);
-               PUSHi(i);
+                   i = sv_pos_b2u_flags(sv, i, SV_GMAGIC|SV_CONST_RETURN);
+               PUSHu(i);
                RETURN;
            }
            RETPUSHUNDEF;
diff --git a/proto.h b/proto.h
index 32607ff..e1425d2 100644
--- a/proto.h
+++ b/proto.h
@@ -1631,7 +1631,7 @@ PERL_CALLCONV I32 Perl_hv_placeholders_get(pTHX_ const HV 
*hv)
 #define PERL_ARGS_ASSERT_HV_PLACEHOLDERS_GET   \
        assert(hv)
 
-PERL_CALLCONV I32*     Perl_hv_placeholders_p(pTHX_ HV *hv)
+PERL_CALLCONV SSize_t* Perl_hv_placeholders_p(pTHX_ HV *hv)
                        __attribute__warn_unused_result__
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_HV_PLACEHOLDERS_P     \
@@ -4157,6 +4157,11 @@ PERL_CALLCONV void       Perl_sv_pos_b2u(pTHX_ SV *const 
sv, I32 *const offsetp)
 #define PERL_ARGS_ASSERT_SV_POS_B2U    \
        assert(offsetp)
 
+PERL_CALLCONV STRLEN   Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const 
offset, U32 flags)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_SV_POS_B2U_FLAGS      \
+       assert(sv)
+
 PERL_CALLCONV void     Perl_sv_pos_u2b(pTHX_ SV *const sv, I32 *const offsetp, 
I32 *const lenp)
                        __attribute__nonnull__(pTHX_2);
 #define PERL_ARGS_ASSERT_SV_POS_U2B    \
diff --git a/sv.c b/sv.c
index 0f682ea..0dfff05 100644
--- a/sv.c
+++ b/sv.c
@@ -6984,7 +6984,7 @@ S_sv_pos_u2b_cached(pTHX_ SV *const sv, MAGIC **const 
mgp, const U8 *const start
 /*
 =for apidoc sv_pos_u2b_flags
 
-Converts the value pointed to by offsetp from a count of UTF-8 chars from
+Converts the offset from a count of UTF-8 chars from
 the start of the string, to a count of the equivalent number of bytes; if
 lenp is non-zero, it does the same to lenp, but this time starting from
 the offset, rather than from the start
@@ -7093,9 +7093,6 @@ S_utf8_mg_len_cache_update(pTHX_ SV *const sv, MAGIC 
**const mgp,
     assert(*mgp);
 
     (*mgp)->mg_len = ulen;
-    /* For now, treat "overflowed" as "still unknown". See RT #72924.  */
-    if (ulen != (STRLEN) (*mgp)->mg_len)
-       (*mgp)->mg_len = -1;
 }
 
 /* Create and update the UTF8 magic offset cache, with the proffered utf8/
@@ -7273,44 +7270,41 @@ S_sv_pos_b2u_midway(pTHX_ const U8 *const s, const U8 
*const target,
 }
 
 /*
-=for apidoc sv_pos_b2u
+=for apidoc sv_pos_b2u_flags
 
-Converts the value pointed to by offsetp from a count of bytes from the
-start of the string, to a count of the equivalent number of UTF-8 chars.
-Handles magic and type coercion.
+Converts the offset from a count of bytes from the start of the string, to
+a count of the equivalent number of UTF-8 chars.  Handles type coercion.
+I<flags> is passed to C<SvPV_flags>, and usually should be
+C<SV_GMAGIC|SV_CONST_RETURN> to handle magic.
 
 =cut
 */
 
 /*
- * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
- * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
- * byte offsets.
+ * sv_pos_b2u_flags() uses, like sv_pos_u2b_flags(), the mg_ptr of the
+ * potential PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8
+ * and byte offsets.
  *
  */
-void
-Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
+STRLEN
+Perl_sv_pos_b2u_flags(pTHX_ SV *const sv, STRLEN const offset, U32 flags)
 {
     const U8* s;
-    const STRLEN byte = *offsetp;
     STRLEN len = 0; /* Actually always set, but let's keep gcc happy.  */
     STRLEN blen;
     MAGIC* mg = NULL;
     const U8* send;
     bool found = FALSE;
 
-    PERL_ARGS_ASSERT_SV_POS_B2U;
-
-    if (!sv)
-       return;
+    PERL_ARGS_ASSERT_SV_POS_B2U_FLAGS;
 
-    s = (const U8*)SvPV_const(sv, blen);
+    s = (const U8*)SvPV_flags(sv, blen, flags);
 
-    if (blen < byte)
+    if (blen < offset)
        Perl_croak(aTHX_ "panic: sv_pos_b2u: bad byte offset, blen=%"UVuf
-                  ", byte=%"UVuf, (UV)blen, (UV)byte);
+                  ", byte=%"UVuf, (UV)blen, (UV)offset);
 
-    send = s + byte;
+    send = s + offset;
 
     if (!SvREADONLY(sv)
        && PL_utf8cache
@@ -7319,18 +7313,16 @@ Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
     {
        if (mg->mg_ptr) {
            STRLEN * const cache = (STRLEN *) mg->mg_ptr;
-           if (cache[1] == byte) {
+           if (cache[1] == offset) {
                /* An exact match. */
-               *offsetp = cache[0];
-               return;
+               return cache[0];
            }
-           if (cache[3] == byte) {
+           if (cache[3] == offset) {
                /* An exact match. */
-               *offsetp = cache[2];
-               return;
+               return cache[2];
            }
 
-           if (cache[1] < byte) {
+           if (cache[1] < offset) {
                /* We already know part of the way. */
                if (mg->mg_len != -1) {
                    /* Actually, we know the end too.  */
@@ -7341,7 +7333,7 @@ Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
                    len = cache[0] + utf8_length(s + cache[1], send);
                }
            }
-           else if (cache[3] < byte) {
+           else if (cache[3] < offset) {
                /* We're between the two cached pairs, so we do the calculation
                   offset by the byte/utf-8 positions for the earlier pair,
                   then add the utf-8 characters from the string start to
@@ -7351,7 +7343,7 @@ Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
                    + cache[2];
 
            }
-           else { /* cache[3] > byte */
+           else { /* cache[3] > offset */
                len = S_sv_pos_b2u_midway(aTHX_ s, send, s + cache[3],
                                          cache[2]);
 
@@ -7370,14 +7362,46 @@ Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
            assert_uft8_cache_coherent("sv_pos_b2u", len, real_len, sv);
        len = real_len;
     }
-    *offsetp = len;
 
     if (PL_utf8cache) {
-       if (blen == byte)
+       if (blen == offset)
            utf8_mg_len_cache_update(sv, &mg, len);
        else
-           utf8_mg_pos_cache_update(sv, &mg, byte, len, blen);
+           utf8_mg_pos_cache_update(sv, &mg, offset, len, blen);
     }
+
+    return len;
+}
+
+/*
+=for apidoc sv_pos_b2u
+
+Converts the value pointed to by offsetp from a count of bytes from the
+start of the string, to a count of the equivalent number of UTF-8 chars.
+Handles magic and type coercion.
+
+Use C<sv_pos_b2u_flags> in preference, which correctly handles strings
+longer than 2Gb.
+
+=cut
+*/
+
+/*
+ * sv_pos_b2u() uses, like sv_pos_u2b(), the mg_ptr of the potential
+ * PERL_MAGIC_utf8 of the sv to store the mapping between UTF-8 and
+ * byte offsets.
+ *
+ */
+void
+Perl_sv_pos_b2u(pTHX_ SV *const sv, I32 *const offsetp)
+{
+    PERL_ARGS_ASSERT_SV_POS_B2U;
+
+    if (!sv)
+       return;
+
+    *offsetp = (I32)sv_pos_b2u_flags(sv, (STRLEN)*offsetp,
+                                    SV_GMAGIC|SV_CONST_RETURN);
 }
 
 static void
diff --git a/t/bigmem/pos.t b/t/bigmem/pos.t
new file mode 100644
index 0000000..aad93f6
--- /dev/null
+++ b/t/bigmem/pos.t
@@ -0,0 +1,25 @@
+#!perl
+BEGIN {
+    chdir 't';
+    unshift @INC, "../lib";
+    require './test.pl';
+}
+
+use Config qw(%Config);
+
+$ENV{PERL_TEST_MEMORY} >= 2
+    or skip_all("Need ~2Gb for this test");
+$Config{ptrsize} >= 8
+    or skip_all("Need 64-bit pointers for this test");
+
+plan(3);
+
+# [perl #116907]
+# ${\2} to defeat constant folding, which in this case actually slows
+# things down
+my $x=" "x(${\2}**31+20);
+pos $x = 2**31-5;
+is pos $x, 2147483643, 'setting pos on large string';
+pos $x += 10;
+is pos $x, 2147483653, 'reading lvalue pos after setting it > 2**31';
+is scalar(pos $x), 2147483653, 'reading it with pos() in rvalue context';
diff --git a/t/op/pos.t b/t/op/pos.t
index 4eca3a6..b9691ad 100644
--- a/t/op/pos.t
+++ b/t/op/pos.t
@@ -6,7 +6,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan tests => 21;
+plan tests => 22;
 
 $x='banana';
 $x=~/.a/g;
@@ -36,6 +36,8 @@ $x = "\x{100}BC";
 $x =~ m/.*/g;
 is(pos $x, 3, "utf8_mg_len_cache_update() test");
 
+is(scalar pos $x, 3, "rvalue pos() utf8 test");
+
 
 my $destroyed;
 { package Class; DESTROY { ++$destroyed; } }

--
Perl5 Master Repository

Reply via email to