In perl.git, the branch blead has been updated <http://perl5.git.perl.org/perl.git/commitdiff/f70a2f5652ad183b5a2f30455d893099191d896d?hp=429a3a11f97e72ecd402d69d36672e64e38beaa4>
- Log ----------------------------------------------------------------- commit f70a2f5652ad183b5a2f30455d893099191d896d Author: Karl Williamson <k...@cpan.org> Date: Mon Oct 17 20:16:21 2016 -0600 op.c: Fix EBCDIC-only bug We have no tests that this fails for, but on an EBCDIC machine, the branches here are incorrect. They are trying to determine if a UTF-8 representation will be larger than a non-UTF-8 representation for code points < 256. The proper test is if the code points are UTF-8 invariant. M op.c commit b2873c1529bb5be58a32d6aa601af13f48046b58 Author: Karl Williamson <k...@cpan.org> Date: Thu Aug 18 21:13:04 2016 -0600 toke.c: Fix EBCDIC issue The value here is Unicode, not native, so needs a different macro. There's no test for this, as this is allocating space, and could be one byte off, which is only a problem if it is one byte small, and we were at a limit where that single byte made the difference. M toke.c commit d5944cabe635d7034195dc80ce81b130741a0707 Author: Karl Williamson <k...@cpan.org> Date: Thu Aug 18 17:57:14 2016 -0600 perlapi: Grammar, typos M utf8.c commit 4698efb644bb95e81bec2aef3a06e4f46c01f53d Author: Karl Williamson <k...@cpan.org> Date: Wed Aug 17 15:22:50 2016 -0600 toke.c: Fix grammar in comment M toke.c ----------------------------------------------------------------------- Summary of changes: op.c | 5 +++-- toke.c | 8 ++++---- utf8.c | 2 +- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/op.c b/op.c index 1866632..34c9a60 100644 --- a/op.c +++ b/op.c @@ -5426,7 +5426,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) tbl[i] = (short)i; } else { - if (i < 128 && r[j] >= 128) + if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j])) grows = 1; tbl[i] = r[j++]; } @@ -5473,7 +5473,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl) --j; } if (tbl[t[i]] == -1) { - if (t[i] < 128 && r[j] >= 128) + if ( UVCHR_IS_INVARIANT(t[i]) + && ! UVCHR_IS_INVARIANT(r[j])) grows = 1; tbl[t[i]] = r[j]; } diff --git a/toke.c b/toke.c index d44d36a..c721575 100644 --- a/toke.c +++ b/toke.c @@ -2891,9 +2891,9 @@ S_scan_const(pTHX_ char *start) * Ranges entirely within Latin1 are expanded out entirely, in * order to avoid the significant overhead of making a swash. * Ranges that extend above Latin1 have to have a swash, so there - * is no advantage to abbreviate them here, so they are stored here - * as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies a - * hyphen without any possible ambiguity. On EBCDIC machines, if + * is no advantage to abbreviating them here, so they are stored + * here as Min, ILLEGAL_UTF8_BYTE, Max. The illegal byte signifies + * a hyphen without any possible ambiguity. On EBCDIC machines, if * the range is expressed as Unicode, the Latin1 portion is * expanded out even if the entire range extends above Latin1. * This is because each code point in it has to be processed here @@ -3531,7 +3531,7 @@ S_scan_const(pTHX_ char *start) sv_utf8_upgrade_flags_grow( sv, SV_GMAGIC|SV_FORCE_UTF8_UPGRADE, - UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1); + OFFUNISKIP(uv) + (STRLEN)(send - e) + 1); d = SvPVX(sv) + SvCUR(sv); has_utf8 = TRUE; } diff --git a/utf8.c b/utf8.c index fb3acad..a60fb50 100644 --- a/utf8.c +++ b/utf8.c @@ -841,7 +841,7 @@ C<UTF8_CHECK_ONLY> is also specified.) It is now deprecated to have very high code points (above C<IV_MAX> on the platforms) and this function will raise a deprecation warning for these (unless -such warnings are turned off). This value, is typically 0x7FFF_FFFF (2**31 -1) +such warnings are turned off). This value is typically 0x7FFF_FFFF (2**31 -1) in a 32-bit word. Code points above 0x7FFF_FFFF (2**31 - 1) were never specified in any standard, -- Perl5 Master Repository