In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/f70a2f5652ad183b5a2f30455d893099191d896d?hp=429a3a11f97e72ecd402d69d36672e64e38beaa4>

- Log -----------------------------------------------------------------
commit f70a2f5652ad183b5a2f30455d893099191d896d
Author: Karl Williamson <k...@cpan.org>
Date:   Mon Oct 17 20:16:21 2016 -0600

    op.c: Fix EBCDIC-only bug
    
    We have no tests that this fails for, but on an EBCDIC machine, the
    branches here are incorrect.  They are trying to determine if a UTF-8
    representation will be larger than a non-UTF-8 representation for code
    points < 256.  The proper test is if the code points are UTF-8
    invariant.

M       op.c

commit b2873c1529bb5be58a32d6aa601af13f48046b58
Author: Karl Williamson <k...@cpan.org>
Date:   Thu Aug 18 21:13:04 2016 -0600

    toke.c: Fix EBCDIC issue
    
    The value here is Unicode, not native, so needs a different macro.
    There's no test for this, as this is allocating space, and could be one
    byte off, which is only a problem if it is one byte small, and we were
    at a limit where that single byte made the difference.

M       toke.c

commit d5944cabe635d7034195dc80ce81b130741a0707
Author: Karl Williamson <k...@cpan.org>
Date:   Thu Aug 18 17:57:14 2016 -0600

    perlapi: Grammar, typos

M       utf8.c

commit 4698efb644bb95e81bec2aef3a06e4f46c01f53d
Author: Karl Williamson <k...@cpan.org>
Date:   Wed Aug 17 15:22:50 2016 -0600

    toke.c: Fix grammar in comment

M       toke.c
-----------------------------------------------------------------------

Summary of changes:
 op.c   | 5 +++--
 toke.c | 8 ++++----
 utf8.c | 2 +-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/op.c b/op.c
index 1866632..34c9a60 100644
--- a/op.c
+++ b/op.c
@@ -5426,7 +5426,7 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                        tbl[i] = (short)i;
                }
                else {
-                   if (i < 128 && r[j] >= 128)
+                   if (UVCHR_IS_INVARIANT(i) && ! UVCHR_IS_INVARIANT(r[j]))
                        grows = 1;
                    tbl[i] = r[j++];
                }
@@ -5473,7 +5473,8 @@ S_pmtrans(pTHX_ OP *o, OP *expr, OP *repl)
                --j;
            }
            if (tbl[t[i]] == -1) {
-               if (t[i] < 128 && r[j] >= 128)
+                if (     UVCHR_IS_INVARIANT(t[i])
+                    && ! UVCHR_IS_INVARIANT(r[j]))
                    grows = 1;
                tbl[t[i]] = r[j];
            }
diff --git a/toke.c b/toke.c
index d44d36a..c721575 100644
--- a/toke.c
+++ b/toke.c
@@ -2891,9 +2891,9 @@ S_scan_const(pTHX_ char *start)
              * Ranges entirely within Latin1 are expanded out entirely, in
              * order to avoid the significant overhead of making a swash.
              * Ranges that extend above Latin1 have to have a swash, so there
-             * is no advantage to abbreviate them here, so they are stored here
-             * as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies a
-             * hyphen without any possible ambiguity.  On EBCDIC machines, if
+             * is no advantage to abbreviating them here, so they are stored
+             * here as Min, ILLEGAL_UTF8_BYTE, Max.  The illegal byte signifies
+             * a hyphen without any possible ambiguity.  On EBCDIC machines, if
              * the range is expressed as Unicode, the Latin1 portion is
              * expanded out even if the entire range extends above Latin1.
              * This is because each code point in it has to be processed here
@@ -3531,7 +3531,7 @@ S_scan_const(pTHX_ char *start)
                            sv_utf8_upgrade_flags_grow(
                                     sv,
                                     SV_GMAGIC|SV_FORCE_UTF8_UPGRADE,
-                                   UVCHR_SKIP(uv) + (STRLEN)(send - e) + 1);
+                                   OFFUNISKIP(uv) + (STRLEN)(send - e) + 1);
                            d = SvPVX(sv) + SvCUR(sv);
                            has_utf8 = TRUE;
                        }
diff --git a/utf8.c b/utf8.c
index fb3acad..a60fb50 100644
--- a/utf8.c
+++ b/utf8.c
@@ -841,7 +841,7 @@ C<UTF8_CHECK_ONLY> is also specified.)
 
 It is now deprecated to have very high code points (above C<IV_MAX> on the
 platforms) and this function will raise a deprecation warning for these (unless
-such warnings are turned off).  This value, is typically 0x7FFF_FFFF (2**31 -1)
+such warnings are turned off).  This value is typically 0x7FFF_FFFF (2**31 -1)
 in a 32-bit word.
 
 Code points above 0x7FFF_FFFF (2**31 - 1) were never specified in any standard,

--
Perl5 Master Repository

Reply via email to