Change 30069 by [EMAIL PROTECTED] on 2007/01/29 21:05:26

        Integrate:
        [ 29201]
        Subject: [PATCH] replace S_sv_pos_b2u_forwards with utf8_length (was: 
sv_pos_b2u dislikes the extended UTF-8)
        From: SADAHIRO Tomoyuki <[EMAIL PROTECTED]>
        Date: Sun, 21 May 2006 14:00:43 +0900
        Message-Id: <[EMAIL PROTECTED]>
        
        [ 29229]
        Subject: [PATCH] Perl_die() / Perl_croak()
        From: "Philip M. Gollucci" <[EMAIL PROTECTED]>
        Date: Tue, 07 Nov 2006 23:27:40 -0800
        Message-ID: <[EMAIL PROTECTED]>
        
        (backported to embed.fnc)
        
        [ 29259]
        Subject: [PATCH] was RE: Perl_die() / Perl_croak()
        From: "Robin Barker" <[EMAIL PROTECTED]>
        Date: Mon, 13 Nov 2006 10:25:08 -0000
        Message-ID: <[EMAIL PROTECTED]>
        
        [ 29313]
        Tweaks needed to get Perl compiling with g++ on OpenBSD.
        
        [ 29363]
        Remove superfluous SvROK()
        
        [ 29386]
        Change (well, correct) S_add_data to take and return a U32.
        
        [ 29387]
        Simplify S_add_data(), given that realloc will NULL acts as malloc().
        
        [ 29476]
        save_I8 is not a mathom.
        
        [ 29482]
        Subject: [PATCH] Perl_save_destructor is not a mathom
        From: "Jerry D. Hedden" <[EMAIL PROTECTED]>
        Date: Wed, 6 Dec 2006 11:12:13 -0800 (PST)
        Message-ID: <[EMAIL PROTECTED]>
        
        [ 29486]
        No need to upgrade if all we're going to do is return.

Affected files ...

... //depot/maint-5.8/perl/embed.fnc#202 integrate
... //depot/maint-5.8/perl/embed.h#151 integrate
... //depot/maint-5.8/perl/embed.pl#32 integrate
... //depot/maint-5.8/perl/mathoms.c#27 integrate
... //depot/maint-5.8/perl/perl.h#152 integrate
... //depot/maint-5.8/perl/perlio.c#102 integrate
... //depot/maint-5.8/perl/proto.h#193 integrate
... //depot/maint-5.8/perl/regcomp.c#97 edit
... //depot/maint-5.8/perl/scope.c#65 edit
... //depot/maint-5.8/perl/sv.c#334 edit
... //depot/maint-5.8/perl/t/op/index.t#6 integrate
... //depot/maint-5.8/perl/util.c#138 integrate

Differences ...

==== //depot/maint-5.8/perl/embed.fnc#202 (text) ====
Index: perl/embed.fnc
--- perl/embed.fnc#201~30042~   2007-01-27 14:45:18.000000000 -0800
+++ perl/embed.fnc      2007-01-29 13:05:26.000000000 -0800
@@ -1278,7 +1278,7 @@
 Es     |I32    |study_chunk    |NN struct RExC_state_t* state|NN regnode 
**scanp \
                                |NN I32 *deltap|NN regnode *last|NULLOK struct 
scan_data_t *data \
                                |U32 flags
-EsRn   |I32    |add_data       |NN struct RExC_state_t* state|I32 n|NN const 
char *s
+EsRn   |U32    |add_data       |NN struct RExC_state_t* state|U32 n|NN const 
char *s
 rs     |void   |re_croak2      |NN const char* pat1|NN const char* pat2|...
 Es     |I32    |regpposixcc    |NN struct RExC_state_t* state|I32 value
 Es     |void   |checkposixcc   |NN struct RExC_state_t* state
@@ -1334,7 +1334,6 @@
                |STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
 s      |void   |utf8_mg_pos_cache_update|NN SV *sv|NN MAGIC **mgp \
                |STRLEN byte|STRLEN utf8|STRLEN blen
-s      |STRLEN |sv_pos_b2u_forwards|NN const U8 *s|NN const U8 *const target
 s      |STRLEN |sv_pos_b2u_midway|NN const U8 *s|NN const U8 *const target \
                |NN const U8 *end|STRLEN endu
 s      |char * |stringify_regexp|NN SV *sv|NN MAGIC *mg|NULLOK STRLEN *lp

==== //depot/maint-5.8/perl/embed.h#151 (text+w) ====
Index: perl/embed.h
--- perl/embed.h#150~30033~     2007-01-27 08:40:35.000000000 -0800
+++ perl/embed.h        2007-01-29 13:05:26.000000000 -0800
@@ -1357,7 +1357,6 @@
 #define sv_pos_u2b_midway      S_sv_pos_u2b_midway
 #define sv_pos_u2b_cached      S_sv_pos_u2b_cached
 #define utf8_mg_pos_cache_update       S_utf8_mg_pos_cache_update
-#define sv_pos_b2u_forwards    S_sv_pos_b2u_forwards
 #define sv_pos_b2u_midway      S_sv_pos_b2u_midway
 #define stringify_regexp       S_stringify_regexp
 #define F0convert              S_F0convert
@@ -3452,7 +3451,6 @@
 #define sv_pos_u2b_midway      S_sv_pos_u2b_midway
 #define sv_pos_u2b_cached(a,b,c,d,e,f,g)       S_sv_pos_u2b_cached(aTHX_ 
a,b,c,d,e,f,g)
 #define utf8_mg_pos_cache_update(a,b,c,d,e)    
S_utf8_mg_pos_cache_update(aTHX_ a,b,c,d,e)
-#define sv_pos_b2u_forwards(a,b)       S_sv_pos_b2u_forwards(aTHX_ a,b)
 #define sv_pos_b2u_midway(a,b,c,d)     S_sv_pos_b2u_midway(aTHX_ a,b,c,d)
 #define stringify_regexp(a,b,c)        S_stringify_regexp(aTHX_ a,b,c)
 #define F0convert              S_F0convert

==== //depot/maint-5.8/perl/embed.pl#32 (xtext) ====
Index: perl/embed.pl
--- perl/embed.pl#31~29897~     2007-01-20 10:14:46.000000000 -0800
+++ perl/embed.pl       2007-01-29 13:05:26.000000000 -0800
@@ -217,10 +217,14 @@
            push @attrs, "__attribute__pure__";
        }
        if( $flags =~ /f/ ) {
-           my $prefix = $has_context ? 'pTHX_' : '';
-           my $args = scalar @args;
-           push @attrs, sprintf "__attribute__format__(__printf__,%s%d,%s%d)",
-                                   $prefix, $args - 1, $prefix, $args;
+           my $prefix  = $has_context ? 'pTHX_' : '';
+           my $args    = scalar @args;
+           my $pat     = $args - 1;
+           my $macro   = @nonnull && $nonnull[-1] == $pat  
+                               ? '__attribute__format__'
+                               : '__attribute__format__null_ok__';
+           push @attrs, sprintf "%s(__printf__,%s%d,%s%d)", $macro,
+                               $prefix, $pat, $prefix, $args;
        }
        if ( @nonnull ) {
            my @pos = map { $has_context ? "pTHX_$_" : $_ } @nonnull;

==== //depot/maint-5.8/perl/mathoms.c#27 (text) ====
Index: perl/mathoms.c
--- perl/mathoms.c#26~30036~    2007-01-27 09:35:47.000000000 -0800
+++ perl/mathoms.c      2007-01-29 13:05:26.000000000 -0800
@@ -1181,15 +1181,6 @@
 }
 
 void
-Perl_save_I8(pTHX_ I8 *bytep)
-{
-    SSCHECK(3);
-    SSPUSHINT(*bytep);
-    SSPUSHPTR(bytep);
-    SSPUSHINT(SAVEt_I8);
-}
-
-void
 Perl_save_iv(pTHX_ IV *ivp)
 {
     SSCHECK(3);
@@ -1221,16 +1212,6 @@
     }
 }
 
-void
-Perl_save_destructor(pTHX_ DESTRUCTORFUNC_NOCONTEXT_t f, void* p)
-{
-    SSCHECK(3);
-    SSPUSHDPTR(f);
-    SSPUSHPTR(p);
-    SSPUSHINT(SAVEt_DESTRUCTOR);
-}
-
-
 /*
 =for apidoc sv_usepvn_mg
 

==== //depot/maint-5.8/perl/perl.h#152 (text) ====
Index: perl/perl.h
--- perl/perl.h#151~30060~      2007-01-29 09:14:16.000000000 -0800
+++ perl/perl.h 2007-01-29 13:05:26.000000000 -0800
@@ -1095,6 +1095,10 @@
 EXTERN_C char **environ;
 #endif
 
+#if defined(__OpenBSD__) && defined(__cplusplus)
+EXTERN_C char **environ;
+#endif
+
 #ifdef SETERRNO
 # undef SETERRNO  /* SOCKS might have defined this */
 #endif
@@ -3023,6 +3027,13 @@
 #  define NORETURN_FUNCTION_END /* NOTREACHED */ return 0
 #endif
 
+/* Some OS warn on NULL format to printf */
+#ifdef PRINTF_FORMAT_NULL_OK
+#  define __attribute__format__null_ok__(x,y,z)  __attribute__format__(x,y,z)
+#else
+#  define __attribute__format__null_ok__(x,y,z)  
+#endif
+
 #ifdef HAS_BUILTIN_EXPECT
 #  define EXPECT(expr,val)                  __builtin_expect(expr,val)
 #else

==== //depot/maint-5.8/perl/perlio.c#102 (text) ====
Index: perl/perlio.c
--- perl/perlio.c#101~30065~    2007-01-29 10:52:30.000000000 -0800
+++ perl/perlio.c       2007-01-29 13:05:26.000000000 -0800
@@ -4560,7 +4560,7 @@
                }
                posn = (b->posn / page_size) * page_size;
                len = st.st_size - posn;
-               m->mptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, posn);
+               m->mptr = (Mmap_t)mmap(NULL, len, PROT_READ, MAP_SHARED, fd, 
posn);
                if (m->mptr && m->mptr != (Mmap_t) - 1) {
 #if 0 && defined(HAS_MADVISE) && defined(MADV_SEQUENTIAL)
                    madvise(m->mptr, len, MADV_SEQUENTIAL);

==== //depot/maint-5.8/perl/proto.h#193 (text+w) ====
Index: perl/proto.h
--- perl/proto.h#192~30042~     2007-01-27 14:45:18.000000000 -0800
+++ perl/proto.h        2007-01-29 13:05:26.000000000 -0800
@@ -154,7 +154,7 @@
 PERL_CALLCONV PERL_CONTEXT*    Perl_create_eval_scope(pTHX_ U32 flags);
 PERL_CALLCONV void     Perl_croak(pTHX_ const char* pat, ...)
                        __attribute__noreturn__
-                       __attribute__format__(__printf__,pTHX_1,pTHX_2);
+                       
__attribute__format__null_ok__(__printf__,pTHX_1,pTHX_2);
 
 PERL_CALLCONV void     Perl_vcroak(pTHX_ const char* pat, va_list* args)
                        __attribute__noreturn__;
@@ -260,7 +260,7 @@
 PERL_CALLCONV void     Perl_deprecate(pTHX_ char* s);
 PERL_CALLCONV void     Perl_deprecate_old(pTHX_ char* s);
 PERL_CALLCONV OP*      Perl_die(pTHX_ const char* pat, ...)
-                       __attribute__format__(__printf__,pTHX_1,pTHX_2);
+                       
__attribute__format__null_ok__(__printf__,pTHX_1,pTHX_2);
 
 PERL_CALLCONV OP*      Perl_vdie(pTHX_ const char* pat, va_list* args);
 PERL_CALLCONV OP*      Perl_die_where(pTHX_ char* message, STRLEN msglen);
@@ -1844,7 +1844,7 @@
 STATIC void    S_cl_and(struct regnode_charclass_class *cl, const struct 
regnode_charclass_class *and_with);
 STATIC void    S_cl_or(const struct RExC_state_t* state, struct 
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
 STATIC I32     S_study_chunk(pTHX_ struct RExC_state_t* state, regnode 
**scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags);
-STATIC I32     S_add_data(struct RExC_state_t* state, I32 n, const char *s)
+STATIC U32     S_add_data(struct RExC_state_t* state, U32 n, const char *s)
                        __attribute__warn_unused_result__;
 
 STATIC void    S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...)
@@ -1916,7 +1916,6 @@
 STATIC STRLEN  S_sv_pos_u2b_midway(const U8 *const start, const U8 *send, 
STRLEN uoffset, STRLEN uend);
 STATIC STRLEN  S_sv_pos_u2b_cached(pTHX_ SV *sv, MAGIC **mgp, const U8 *const 
start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0);
 STATIC void    S_utf8_mg_pos_cache_update(pTHX_ SV *sv, MAGIC **mgp, STRLEN 
byte, STRLEN utf8, STRLEN blen);
-STATIC STRLEN  S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const 
target);
 STATIC STRLEN  S_sv_pos_b2u_midway(pTHX_ const U8 *s, const U8 *const target, 
const U8 *end, STRLEN endu);
 STATIC char *  S_stringify_regexp(pTHX_ SV *sv, MAGIC *mg, STRLEN *lp);
 STATIC char *  S_F0convert(NV nv, char *endbuf, STRLEN *len);

==== //depot/maint-5.8/perl/regcomp.c#97 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#96~30058~    2007-01-29 08:46:38.000000000 -0800
+++ perl/regcomp.c      2007-01-29 13:05:26.000000000 -0800
@@ -1662,25 +1662,21 @@
     return min;
 }
 
-STATIC I32
-S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
+STATIC U32
+S_add_data(RExC_state_t *pRExC_state, U32 n, const char *s)
 {
-    if (RExC_rx->data) {
-       const U32 count = RExC_rx->data->count;
-       Renewc(RExC_rx->data,
-              sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
-              char, struct reg_data);
+    U32 count = RExC_rx->data ? RExC_rx->data->count : 0;
+
+    Renewc(RExC_rx->data,
+          sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
+          char, struct reg_data);
+    if(count)
        Renew(RExC_rx->data->what, count + n, U8);
-       RExC_rx->data->count += n;
-    }
-    else {
-       Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1),
-            char, struct reg_data);
+    else
        Newx(RExC_rx->data->what, n, U8);
-       RExC_rx->data->count = n;
-    }
-    Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
-    return RExC_rx->data->count - n;
+    RExC_rx->data->count = count + n;
+    Copy(s, RExC_rx->data->what + count, n, U8);
+    return count;
 }
 
 void
@@ -2023,7 +2019,7 @@
            && !(data.start_class->flags & ANYOF_EOS)
            && !cl_is_anything(data.start_class))
        {
-           const I32 n = add_data(pRExC_state, 1, "f");
+           const U32 n = add_data(pRExC_state, 1, "f");
 
            Newx(RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
@@ -2079,7 +2075,7 @@
        if (!(data.start_class->flags & ANYOF_EOS)
            && !cl_is_anything(data.start_class))
        {
-           const I32 n = add_data(pRExC_state, 1, "f");
+           const U32 n = add_data(pRExC_state, 1, "f");
 
            Newx(RExC_rx->data->data[n], 1,
                struct regnode_charclass_class);
@@ -2199,7 +2195,8 @@
                /* FALL THROUGH */
            case '{':           /* (?{...}) */
            {
-               I32 count = 1, n = 0;
+               I32 count = 1;
+               U32 n = 0;
                char c;
                char *s = RExC_parse;
 

==== //depot/maint-5.8/perl/scope.c#65 (text) ====
Index: perl/scope.c
--- perl/scope.c#64~30066~      2007-01-29 11:07:36.000000000 -0800
+++ perl/scope.c        2007-01-29 13:05:26.000000000 -0800
@@ -357,6 +357,15 @@
 }
 
 void
+Perl_save_I8(pTHX_ I8 *bytep)
+{
+    SSCHECK(3);
+    SSPUSHINT(*bytep);
+    SSPUSHPTR(bytep);
+    SSPUSHINT(SAVEt_I8);
+}
+
+void
 Perl_save_I32(pTHX_ I32 *intp)
 {
     SSCHECK(3);
@@ -492,6 +501,15 @@
 }
 
 void
+Perl_save_destructor(pTHX_ DESTRUCTORFUNC_NOCONTEXT_t f, void* p)
+{
+    SSCHECK(3);
+    SSPUSHDPTR(f);
+    SSPUSHPTR(p);
+    SSPUSHINT(SAVEt_DESTRUCTOR);
+}
+
+void
 Perl_save_destructor_x(pTHX_ DESTRUCTORFUNC_t f, void* p)
 {
     SSCHECK(3);

==== //depot/maint-5.8/perl/sv.c#334 (text) ====
Index: perl/sv.c
--- perl/sv.c#333~30065~        2007-01-29 10:52:30.000000000 -0800
+++ perl/sv.c   2007-01-29 13:05:26.000000000 -0800
@@ -3430,8 +3430,7 @@
     sflags = SvFLAGS(sstr);
 
     if (sflags & SVf_ROK) {
-       if (dtype == SVt_PVGV &&
-           SvROK(sstr) && SvTYPE(SvRV(sstr)) == SVt_PVGV) {
+       if (dtype == SVt_PVGV && SvTYPE(SvRV(sstr)) == SVt_PVGV) {
            sstr = SvRV(sstr);
            if (sstr == dstr) {
                if (GvIMPORTED(dstr) != GVf_IMPORTED
@@ -5117,18 +5116,7 @@
 
     if (PL_utf8cache < 0) {
        const U8 *start = (const U8 *) SvPVX_const(sv);
-       const U8 *const end = start + byte;
-       STRLEN realutf8 = 0;
-
-       while (start < end) {
-           start += UTF8SKIP(start);
-           realutf8++;
-       }
-
-       /* Can't use S_sv_pos_b2u_forwards as it will scream warnings on
-          surrogates.  FIXME - is it inconsistent that b2u warns, but u2b
-          doesn't?  I don't know whether this difference was introduced with
-          the caching code in 5.8.1.  */
+       const STRLEN realutf8 = utf8_length((U8 *)start, (U8 *)start + byte);
 
        if (realutf8 != utf8) {
            /* Need to turn the assertions off otherwise we may recurse
@@ -5239,29 +5227,6 @@
     ASSERT_UTF8_CACHE(cache);
 }
 
-/* If we don't know the character offset of the end of a region, our only
-   option is to walk forwards to the target byte offset.  */
-static STRLEN
-S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const target)
-{
-    STRLEN len = 0;
-    while (s < target) {
-       STRLEN n = 1;
-
-       /* Call utf8n_to_uvchr() to validate the sequence
-        * (unless a simple non-UTF character) */
-       if (!UTF8_IS_INVARIANT(*s))
-           utf8n_to_uvchr((U8 *)s, UTF8SKIP(s), &n, 0);
-       if (n > 0) {
-           s += n;
-           len++;
-       }
-       else
-           break;
-    }
-    return len;
-}
-
 /* We already know all of the way, now we may be able to walk back.  The same
    assumption is made as in S_sv_pos_u2b_midway(), namely that walking
    backward is half the speed of walking forward. */
@@ -5273,7 +5238,7 @@
     STRLEN backw = end - target;
 
     if (forw < 2 * backw) {
-       return S_sv_pos_b2u_forwards(aTHX_ s, target);
+       return utf8_length((U8 *)s, (U8 *)target);
     }
 
     while (end > target) {
@@ -5346,8 +5311,8 @@
                        + S_sv_pos_b2u_midway(aTHX_ s + cache[1], send,
                                              s + blen, mg->mg_len - cache[0]);
                } else {
-                   len = cache[0]
-                       + S_sv_pos_b2u_forwards(aTHX_ s + cache[1], send);
+                   len = cache[0] + utf8_length((U8 *)s + cache[1],
+                                                (U8 *)send);
                }
            }
            else if (cache[3] < byte) {
@@ -5373,7 +5338,7 @@
        }
     }
     if (!found || PL_utf8cache < 0) {
-       const STRLEN real_len = S_sv_pos_b2u_forwards(aTHX_ s, send);
+       const STRLEN real_len = utf8_length((U8 *)s, (U8 *)send);
 
        if (found && PL_utf8cache < 0) {
            if (len != real_len) {

==== //depot/maint-5.8/perl/t/op/index.t#6 (xtext) ====
Index: perl/t/op/index.t
--- perl/t/op/index.t#5~27317~  2006-02-24 08:36:54.000000000 -0800
+++ perl/t/op/index.t   2007-01-29 13:05:26.000000000 -0800
@@ -3,11 +3,11 @@
 BEGIN {
     chdir 't' if -d 't';
     @INC = '../lib';
+    require './test.pl';
 }
 
 use strict;
-require './test.pl';
-plan( tests => 66 );
+plan( tests => 69 );
 
 my $foo = 'Now is the time for all good men to come to the aid of their 
country.';
 
@@ -133,3 +133,18 @@
        fresh_perl_is($prog, $expect_pos, {}, "\$[ = $arraybase$utf8");
     }
 }
+
+SKIP: {
+    skip "UTF-EBCDIC is limited to 0x7fffffff", 3 if ord("A") == 193;
+
+    my $a = "\x{80000000}";
+    my $s = $a.'defxyz';
+    is(index($s, 'def'), 1, "0x80000000 is a single character");
+
+    my $b = "\x{fffffffd}";
+    my $t = $b.'pqrxyz';
+    is(index($t, 'pqr'), 1, "0xfffffffd is a single character");
+
+    local ${^UTF8CACHE} = -1;
+    is(index($t, 'xyz'), 4, "0xfffffffd and utf8cache");
+}

==== //depot/maint-5.8/perl/util.c#138 (text) ====
Index: perl/util.c
--- perl/util.c#137~30057~      2007-01-29 07:55:07.000000000 -0800
+++ perl/util.c 2007-01-29 13:05:26.000000000 -0800
@@ -486,9 +486,9 @@
            mg->mg_len++;
     }
     s = (U8*)SvPV_force_mutable(sv, len);
-    (void)SvUPGRADE(sv, SVt_PVBM);
     if (len == 0)              /* TAIL might be on a zero-length string. */
        return;
+    (void)SvUPGRADE(sv, SVt_PVBM);
     if (len > 2) {
        const unsigned char *sb;
        const U8 mlen = (len>255) ? 255 : (U8)len;
End of Patch.

Reply via email to