Change 30069 by [EMAIL PROTECTED] on 2007/01/29 21:05:26
Integrate:
[ 29201]
Subject: [PATCH] replace S_sv_pos_b2u_forwards with utf8_length (was:
sv_pos_b2u dislikes the extended UTF-8)
From: SADAHIRO Tomoyuki <[EMAIL PROTECTED]>
Date: Sun, 21 May 2006 14:00:43 +0900
Message-Id: <[EMAIL PROTECTED]>
[ 29229]
Subject: [PATCH] Perl_die() / Perl_croak()
From: "Philip M. Gollucci" <[EMAIL PROTECTED]>
Date: Tue, 07 Nov 2006 23:27:40 -0800
Message-ID: <[EMAIL PROTECTED]>
(backported to embed.fnc)
[ 29259]
Subject: [PATCH] was RE: Perl_die() / Perl_croak()
From: "Robin Barker" <[EMAIL PROTECTED]>
Date: Mon, 13 Nov 2006 10:25:08 -0000
Message-ID: <[EMAIL PROTECTED]>
[ 29313]
Tweaks needed to get Perl compiling with g++ on OpenBSD.
[ 29363]
Remove superfluous SvROK()
[ 29386]
Change (well, correct) S_add_data to take and return a U32.
[ 29387]
Simplify S_add_data(), given that realloc will NULL acts as malloc().
[ 29476]
save_I8 is not a mathom.
[ 29482]
Subject: [PATCH] Perl_save_destructor is not a mathom
From: "Jerry D. Hedden" <[EMAIL PROTECTED]>
Date: Wed, 6 Dec 2006 11:12:13 -0800 (PST)
Message-ID: <[EMAIL PROTECTED]>
[ 29486]
No need to upgrade if all we're going to do is return.
Affected files ...
... //depot/maint-5.8/perl/embed.fnc#202 integrate
... //depot/maint-5.8/perl/embed.h#151 integrate
... //depot/maint-5.8/perl/embed.pl#32 integrate
... //depot/maint-5.8/perl/mathoms.c#27 integrate
... //depot/maint-5.8/perl/perl.h#152 integrate
... //depot/maint-5.8/perl/perlio.c#102 integrate
... //depot/maint-5.8/perl/proto.h#193 integrate
... //depot/maint-5.8/perl/regcomp.c#97 edit
... //depot/maint-5.8/perl/scope.c#65 edit
... //depot/maint-5.8/perl/sv.c#334 edit
... //depot/maint-5.8/perl/t/op/index.t#6 integrate
... //depot/maint-5.8/perl/util.c#138 integrate
Differences ...
==== //depot/maint-5.8/perl/embed.fnc#202 (text) ====
Index: perl/embed.fnc
--- perl/embed.fnc#201~30042~ 2007-01-27 14:45:18.000000000 -0800
+++ perl/embed.fnc 2007-01-29 13:05:26.000000000 -0800
@@ -1278,7 +1278,7 @@
Es |I32 |study_chunk |NN struct RExC_state_t* state|NN regnode
**scanp \
|NN I32 *deltap|NN regnode *last|NULLOK struct
scan_data_t *data \
|U32 flags
-EsRn |I32 |add_data |NN struct RExC_state_t* state|I32 n|NN const
char *s
+EsRn |U32 |add_data |NN struct RExC_state_t* state|U32 n|NN const
char *s
rs |void |re_croak2 |NN const char* pat1|NN const char* pat2|...
Es |I32 |regpposixcc |NN struct RExC_state_t* state|I32 value
Es |void |checkposixcc |NN struct RExC_state_t* state
@@ -1334,7 +1334,6 @@
|STRLEN uoffset|STRLEN uoffset0|STRLEN boffset0
s |void |utf8_mg_pos_cache_update|NN SV *sv|NN MAGIC **mgp \
|STRLEN byte|STRLEN utf8|STRLEN blen
-s |STRLEN |sv_pos_b2u_forwards|NN const U8 *s|NN const U8 *const target
s |STRLEN |sv_pos_b2u_midway|NN const U8 *s|NN const U8 *const target \
|NN const U8 *end|STRLEN endu
s |char * |stringify_regexp|NN SV *sv|NN MAGIC *mg|NULLOK STRLEN *lp
==== //depot/maint-5.8/perl/embed.h#151 (text+w) ====
Index: perl/embed.h
--- perl/embed.h#150~30033~ 2007-01-27 08:40:35.000000000 -0800
+++ perl/embed.h 2007-01-29 13:05:26.000000000 -0800
@@ -1357,7 +1357,6 @@
#define sv_pos_u2b_midway S_sv_pos_u2b_midway
#define sv_pos_u2b_cached S_sv_pos_u2b_cached
#define utf8_mg_pos_cache_update S_utf8_mg_pos_cache_update
-#define sv_pos_b2u_forwards S_sv_pos_b2u_forwards
#define sv_pos_b2u_midway S_sv_pos_b2u_midway
#define stringify_regexp S_stringify_regexp
#define F0convert S_F0convert
@@ -3452,7 +3451,6 @@
#define sv_pos_u2b_midway S_sv_pos_u2b_midway
#define sv_pos_u2b_cached(a,b,c,d,e,f,g) S_sv_pos_u2b_cached(aTHX_
a,b,c,d,e,f,g)
#define utf8_mg_pos_cache_update(a,b,c,d,e)
S_utf8_mg_pos_cache_update(aTHX_ a,b,c,d,e)
-#define sv_pos_b2u_forwards(a,b) S_sv_pos_b2u_forwards(aTHX_ a,b)
#define sv_pos_b2u_midway(a,b,c,d) S_sv_pos_b2u_midway(aTHX_ a,b,c,d)
#define stringify_regexp(a,b,c) S_stringify_regexp(aTHX_ a,b,c)
#define F0convert S_F0convert
==== //depot/maint-5.8/perl/embed.pl#32 (xtext) ====
Index: perl/embed.pl
--- perl/embed.pl#31~29897~ 2007-01-20 10:14:46.000000000 -0800
+++ perl/embed.pl 2007-01-29 13:05:26.000000000 -0800
@@ -217,10 +217,14 @@
push @attrs, "__attribute__pure__";
}
if( $flags =~ /f/ ) {
- my $prefix = $has_context ? 'pTHX_' : '';
- my $args = scalar @args;
- push @attrs, sprintf "__attribute__format__(__printf__,%s%d,%s%d)",
- $prefix, $args - 1, $prefix, $args;
+ my $prefix = $has_context ? 'pTHX_' : '';
+ my $args = scalar @args;
+ my $pat = $args - 1;
+ my $macro = @nonnull && $nonnull[-1] == $pat
+ ? '__attribute__format__'
+ : '__attribute__format__null_ok__';
+ push @attrs, sprintf "%s(__printf__,%s%d,%s%d)", $macro,
+ $prefix, $pat, $prefix, $args;
}
if ( @nonnull ) {
my @pos = map { $has_context ? "pTHX_$_" : $_ } @nonnull;
==== //depot/maint-5.8/perl/mathoms.c#27 (text) ====
Index: perl/mathoms.c
--- perl/mathoms.c#26~30036~ 2007-01-27 09:35:47.000000000 -0800
+++ perl/mathoms.c 2007-01-29 13:05:26.000000000 -0800
@@ -1181,15 +1181,6 @@
}
void
-Perl_save_I8(pTHX_ I8 *bytep)
-{
- SSCHECK(3);
- SSPUSHINT(*bytep);
- SSPUSHPTR(bytep);
- SSPUSHINT(SAVEt_I8);
-}
-
-void
Perl_save_iv(pTHX_ IV *ivp)
{
SSCHECK(3);
@@ -1221,16 +1212,6 @@
}
}
-void
-Perl_save_destructor(pTHX_ DESTRUCTORFUNC_NOCONTEXT_t f, void* p)
-{
- SSCHECK(3);
- SSPUSHDPTR(f);
- SSPUSHPTR(p);
- SSPUSHINT(SAVEt_DESTRUCTOR);
-}
-
-
/*
=for apidoc sv_usepvn_mg
==== //depot/maint-5.8/perl/perl.h#152 (text) ====
Index: perl/perl.h
--- perl/perl.h#151~30060~ 2007-01-29 09:14:16.000000000 -0800
+++ perl/perl.h 2007-01-29 13:05:26.000000000 -0800
@@ -1095,6 +1095,10 @@
EXTERN_C char **environ;
#endif
+#if defined(__OpenBSD__) && defined(__cplusplus)
+EXTERN_C char **environ;
+#endif
+
#ifdef SETERRNO
# undef SETERRNO /* SOCKS might have defined this */
#endif
@@ -3023,6 +3027,13 @@
# define NORETURN_FUNCTION_END /* NOTREACHED */ return 0
#endif
+/* Some OS warn on NULL format to printf */
+#ifdef PRINTF_FORMAT_NULL_OK
+# define __attribute__format__null_ok__(x,y,z) __attribute__format__(x,y,z)
+#else
+# define __attribute__format__null_ok__(x,y,z)
+#endif
+
#ifdef HAS_BUILTIN_EXPECT
# define EXPECT(expr,val) __builtin_expect(expr,val)
#else
==== //depot/maint-5.8/perl/perlio.c#102 (text) ====
Index: perl/perlio.c
--- perl/perlio.c#101~30065~ 2007-01-29 10:52:30.000000000 -0800
+++ perl/perlio.c 2007-01-29 13:05:26.000000000 -0800
@@ -4560,7 +4560,7 @@
}
posn = (b->posn / page_size) * page_size;
len = st.st_size - posn;
- m->mptr = mmap(NULL, len, PROT_READ, MAP_SHARED, fd, posn);
+ m->mptr = (Mmap_t)mmap(NULL, len, PROT_READ, MAP_SHARED, fd,
posn);
if (m->mptr && m->mptr != (Mmap_t) - 1) {
#if 0 && defined(HAS_MADVISE) && defined(MADV_SEQUENTIAL)
madvise(m->mptr, len, MADV_SEQUENTIAL);
==== //depot/maint-5.8/perl/proto.h#193 (text+w) ====
Index: perl/proto.h
--- perl/proto.h#192~30042~ 2007-01-27 14:45:18.000000000 -0800
+++ perl/proto.h 2007-01-29 13:05:26.000000000 -0800
@@ -154,7 +154,7 @@
PERL_CALLCONV PERL_CONTEXT* Perl_create_eval_scope(pTHX_ U32 flags);
PERL_CALLCONV void Perl_croak(pTHX_ const char* pat, ...)
__attribute__noreturn__
- __attribute__format__(__printf__,pTHX_1,pTHX_2);
+
__attribute__format__null_ok__(__printf__,pTHX_1,pTHX_2);
PERL_CALLCONV void Perl_vcroak(pTHX_ const char* pat, va_list* args)
__attribute__noreturn__;
@@ -260,7 +260,7 @@
PERL_CALLCONV void Perl_deprecate(pTHX_ char* s);
PERL_CALLCONV void Perl_deprecate_old(pTHX_ char* s);
PERL_CALLCONV OP* Perl_die(pTHX_ const char* pat, ...)
- __attribute__format__(__printf__,pTHX_1,pTHX_2);
+
__attribute__format__null_ok__(__printf__,pTHX_1,pTHX_2);
PERL_CALLCONV OP* Perl_vdie(pTHX_ const char* pat, va_list* args);
PERL_CALLCONV OP* Perl_die_where(pTHX_ char* message, STRLEN msglen);
@@ -1844,7 +1844,7 @@
STATIC void S_cl_and(struct regnode_charclass_class *cl, const struct
regnode_charclass_class *and_with);
STATIC void S_cl_or(const struct RExC_state_t* state, struct
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t* state, regnode
**scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags);
-STATIC I32 S_add_data(struct RExC_state_t* state, I32 n, const char *s)
+STATIC U32 S_add_data(struct RExC_state_t* state, U32 n, const char *s)
__attribute__warn_unused_result__;
STATIC void S_re_croak2(pTHX_ const char* pat1, const char* pat2, ...)
@@ -1916,7 +1916,6 @@
STATIC STRLEN S_sv_pos_u2b_midway(const U8 *const start, const U8 *send,
STRLEN uoffset, STRLEN uend);
STATIC STRLEN S_sv_pos_u2b_cached(pTHX_ SV *sv, MAGIC **mgp, const U8 *const
start, const U8 *const send, STRLEN uoffset, STRLEN uoffset0, STRLEN boffset0);
STATIC void S_utf8_mg_pos_cache_update(pTHX_ SV *sv, MAGIC **mgp, STRLEN
byte, STRLEN utf8, STRLEN blen);
-STATIC STRLEN S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const
target);
STATIC STRLEN S_sv_pos_b2u_midway(pTHX_ const U8 *s, const U8 *const target,
const U8 *end, STRLEN endu);
STATIC char * S_stringify_regexp(pTHX_ SV *sv, MAGIC *mg, STRLEN *lp);
STATIC char * S_F0convert(NV nv, char *endbuf, STRLEN *len);
==== //depot/maint-5.8/perl/regcomp.c#97 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#96~30058~ 2007-01-29 08:46:38.000000000 -0800
+++ perl/regcomp.c 2007-01-29 13:05:26.000000000 -0800
@@ -1662,25 +1662,21 @@
return min;
}
-STATIC I32
-S_add_data(RExC_state_t *pRExC_state, I32 n, const char *s)
+STATIC U32
+S_add_data(RExC_state_t *pRExC_state, U32 n, const char *s)
{
- if (RExC_rx->data) {
- const U32 count = RExC_rx->data->count;
- Renewc(RExC_rx->data,
- sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
- char, struct reg_data);
+ U32 count = RExC_rx->data ? RExC_rx->data->count : 0;
+
+ Renewc(RExC_rx->data,
+ sizeof(*RExC_rx->data) + sizeof(void*) * (count + n - 1),
+ char, struct reg_data);
+ if(count)
Renew(RExC_rx->data->what, count + n, U8);
- RExC_rx->data->count += n;
- }
- else {
- Newxc(RExC_rx->data, sizeof(*RExC_rx->data) + sizeof(void*) * (n - 1),
- char, struct reg_data);
+ else
Newx(RExC_rx->data->what, n, U8);
- RExC_rx->data->count = n;
- }
- Copy(s, RExC_rx->data->what + RExC_rx->data->count - n, n, U8);
- return RExC_rx->data->count - n;
+ RExC_rx->data->count = count + n;
+ Copy(s, RExC_rx->data->what + count, n, U8);
+ return count;
}
void
@@ -2023,7 +2019,7 @@
&& !(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
- const I32 n = add_data(pRExC_state, 1, "f");
+ const U32 n = add_data(pRExC_state, 1, "f");
Newx(RExC_rx->data->data[n], 1,
struct regnode_charclass_class);
@@ -2079,7 +2075,7 @@
if (!(data.start_class->flags & ANYOF_EOS)
&& !cl_is_anything(data.start_class))
{
- const I32 n = add_data(pRExC_state, 1, "f");
+ const U32 n = add_data(pRExC_state, 1, "f");
Newx(RExC_rx->data->data[n], 1,
struct regnode_charclass_class);
@@ -2199,7 +2195,8 @@
/* FALL THROUGH */
case '{': /* (?{...}) */
{
- I32 count = 1, n = 0;
+ I32 count = 1;
+ U32 n = 0;
char c;
char *s = RExC_parse;
==== //depot/maint-5.8/perl/scope.c#65 (text) ====
Index: perl/scope.c
--- perl/scope.c#64~30066~ 2007-01-29 11:07:36.000000000 -0800
+++ perl/scope.c 2007-01-29 13:05:26.000000000 -0800
@@ -357,6 +357,15 @@
}
void
+Perl_save_I8(pTHX_ I8 *bytep)
+{
+ SSCHECK(3);
+ SSPUSHINT(*bytep);
+ SSPUSHPTR(bytep);
+ SSPUSHINT(SAVEt_I8);
+}
+
+void
Perl_save_I32(pTHX_ I32 *intp)
{
SSCHECK(3);
@@ -492,6 +501,15 @@
}
void
+Perl_save_destructor(pTHX_ DESTRUCTORFUNC_NOCONTEXT_t f, void* p)
+{
+ SSCHECK(3);
+ SSPUSHDPTR(f);
+ SSPUSHPTR(p);
+ SSPUSHINT(SAVEt_DESTRUCTOR);
+}
+
+void
Perl_save_destructor_x(pTHX_ DESTRUCTORFUNC_t f, void* p)
{
SSCHECK(3);
==== //depot/maint-5.8/perl/sv.c#334 (text) ====
Index: perl/sv.c
--- perl/sv.c#333~30065~ 2007-01-29 10:52:30.000000000 -0800
+++ perl/sv.c 2007-01-29 13:05:26.000000000 -0800
@@ -3430,8 +3430,7 @@
sflags = SvFLAGS(sstr);
if (sflags & SVf_ROK) {
- if (dtype == SVt_PVGV &&
- SvROK(sstr) && SvTYPE(SvRV(sstr)) == SVt_PVGV) {
+ if (dtype == SVt_PVGV && SvTYPE(SvRV(sstr)) == SVt_PVGV) {
sstr = SvRV(sstr);
if (sstr == dstr) {
if (GvIMPORTED(dstr) != GVf_IMPORTED
@@ -5117,18 +5116,7 @@
if (PL_utf8cache < 0) {
const U8 *start = (const U8 *) SvPVX_const(sv);
- const U8 *const end = start + byte;
- STRLEN realutf8 = 0;
-
- while (start < end) {
- start += UTF8SKIP(start);
- realutf8++;
- }
-
- /* Can't use S_sv_pos_b2u_forwards as it will scream warnings on
- surrogates. FIXME - is it inconsistent that b2u warns, but u2b
- doesn't? I don't know whether this difference was introduced with
- the caching code in 5.8.1. */
+ const STRLEN realutf8 = utf8_length((U8 *)start, (U8 *)start + byte);
if (realutf8 != utf8) {
/* Need to turn the assertions off otherwise we may recurse
@@ -5239,29 +5227,6 @@
ASSERT_UTF8_CACHE(cache);
}
-/* If we don't know the character offset of the end of a region, our only
- option is to walk forwards to the target byte offset. */
-static STRLEN
-S_sv_pos_b2u_forwards(pTHX_ const U8 *s, const U8 *const target)
-{
- STRLEN len = 0;
- while (s < target) {
- STRLEN n = 1;
-
- /* Call utf8n_to_uvchr() to validate the sequence
- * (unless a simple non-UTF character) */
- if (!UTF8_IS_INVARIANT(*s))
- utf8n_to_uvchr((U8 *)s, UTF8SKIP(s), &n, 0);
- if (n > 0) {
- s += n;
- len++;
- }
- else
- break;
- }
- return len;
-}
-
/* We already know all of the way, now we may be able to walk back. The same
assumption is made as in S_sv_pos_u2b_midway(), namely that walking
backward is half the speed of walking forward. */
@@ -5273,7 +5238,7 @@
STRLEN backw = end - target;
if (forw < 2 * backw) {
- return S_sv_pos_b2u_forwards(aTHX_ s, target);
+ return utf8_length((U8 *)s, (U8 *)target);
}
while (end > target) {
@@ -5346,8 +5311,8 @@
+ S_sv_pos_b2u_midway(aTHX_ s + cache[1], send,
s + blen, mg->mg_len - cache[0]);
} else {
- len = cache[0]
- + S_sv_pos_b2u_forwards(aTHX_ s + cache[1], send);
+ len = cache[0] + utf8_length((U8 *)s + cache[1],
+ (U8 *)send);
}
}
else if (cache[3] < byte) {
@@ -5373,7 +5338,7 @@
}
}
if (!found || PL_utf8cache < 0) {
- const STRLEN real_len = S_sv_pos_b2u_forwards(aTHX_ s, send);
+ const STRLEN real_len = utf8_length((U8 *)s, (U8 *)send);
if (found && PL_utf8cache < 0) {
if (len != real_len) {
==== //depot/maint-5.8/perl/t/op/index.t#6 (xtext) ====
Index: perl/t/op/index.t
--- perl/t/op/index.t#5~27317~ 2006-02-24 08:36:54.000000000 -0800
+++ perl/t/op/index.t 2007-01-29 13:05:26.000000000 -0800
@@ -3,11 +3,11 @@
BEGIN {
chdir 't' if -d 't';
@INC = '../lib';
+ require './test.pl';
}
use strict;
-require './test.pl';
-plan( tests => 66 );
+plan( tests => 69 );
my $foo = 'Now is the time for all good men to come to the aid of their
country.';
@@ -133,3 +133,18 @@
fresh_perl_is($prog, $expect_pos, {}, "\$[ = $arraybase$utf8");
}
}
+
+SKIP: {
+ skip "UTF-EBCDIC is limited to 0x7fffffff", 3 if ord("A") == 193;
+
+ my $a = "\x{80000000}";
+ my $s = $a.'defxyz';
+ is(index($s, 'def'), 1, "0x80000000 is a single character");
+
+ my $b = "\x{fffffffd}";
+ my $t = $b.'pqrxyz';
+ is(index($t, 'pqr'), 1, "0xfffffffd is a single character");
+
+ local ${^UTF8CACHE} = -1;
+ is(index($t, 'xyz'), 4, "0xfffffffd and utf8cache");
+}
==== //depot/maint-5.8/perl/util.c#138 (text) ====
Index: perl/util.c
--- perl/util.c#137~30057~ 2007-01-29 07:55:07.000000000 -0800
+++ perl/util.c 2007-01-29 13:05:26.000000000 -0800
@@ -486,9 +486,9 @@
mg->mg_len++;
}
s = (U8*)SvPV_force_mutable(sv, len);
- (void)SvUPGRADE(sv, SVt_PVBM);
if (len == 0) /* TAIL might be on a zero-length string. */
return;
+ (void)SvUPGRADE(sv, SVt_PVBM);
if (len > 2) {
const unsigned char *sb;
const U8 mlen = (len>255) ? 255 : (U8)len;
End of Patch.