[perl.git] branch blead, updated. v5.25.3-282-g534752c

Karl Williamson Wed, 17 Aug 2016 14:24:27 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/534752c1d25d7c52c702337927c37e40c4df103d?hp=cc890588b0ce25c7e37126933582b6d944ad1584>


- Log -----------------------------------------------------------------
commit 534752c1d25d7c52c702337927c37e40c4df103d
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 11 17:43:13 2016 -0600

    utf8.c: Remove an EBCDIC-only path
    
    Rewriting this slightly allows an #ifdef EBCDIC to be removed.
    
    I am copying from code written by [email protected] and included in the
    Encode distribution.

M       utf8.c

commit cee69f79c11928d8dc4009829e006913b0c02059
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 11 16:53:58 2016 -0600

    regexec.c: White-space only

M       regexec.c

commit 594feacd4bcd5777c4b50f0f656cd58e8506c5cc
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 11 16:53:35 2016 -0600

    lib/locale.t: Add some extra info to a debug statement

M       lib/locale.t

commit 389cb6df94a20dfb9291b46c419d7626abcec7ff
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 11 16:49:57 2016 -0600

    lib/locale.t: Generalize test
    
    To test for cased letters, we need to include lower as well as upper,
    folded.  I happen to know that internally currently, this doesn't make a
    difference, but the test should still be written so it will work
    properly if that were to change.

M       lib/locale.t

commit 1065615957bc3821a00a319d54f18f443fc2861e
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 11 16:42:20 2016 -0600

    Take advantage of SvGROW's return value
    
    I had not realized that SvGROW returned the new string pointer.  Using
    that makes a one-step process from a two-step process.
    
    I examined the code for other possible occurrences, and found others
    where it seemed that the two-step seemed clearer, so left those alone.

M       perl.c
M       pp.c
M       toke.c
-----------------------------------------------------------------------

Summary of changes:
 lib/locale.t | 13 ++++++++-----
 perl.c       |  3 +--
 pp.c         | 15 +++++----------
 regexec.c    |  2 +-
 toke.c       |  3 +--
 utf8.c       | 12 ++++--------
 6 files changed, 20 insertions(+), 28 deletions(-)

diff --git a/lib/locale.t b/lib/locale.t
index 0b7f415..e8cedbc 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -979,7 +979,7 @@ foreach my $Locale (@Locale) {
         @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
         @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
         @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
-        @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
+        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
 
         # Sieve the uppercase and the lowercase.
 
@@ -1010,7 +1010,7 @@ foreach my $Locale (@Locale) {
         @{$posixes{'punct'}} = grep /[[:punct:]]/, map {chr } 0..255;
         @{$posixes{'upper'}} = grep /[[:upper:]]/, map {chr } 0..255;
         @{$posixes{'xdigit'}} = grep /[[:xdigit:]]/, map {chr } 0..255;
-        @{$posixes{'cased'}} = grep /[[:upper:]]/i, map {chr } 0..255;
+        @{$posixes{'cased'}} = grep /[[:upper:][:lower:]]/i, map {chr } 0..255;
         for (@{$posixes{'word'}}) {
             if (/[^\d_]/) { # skip digits and the _
                 if (uc($_) eq $_) {
@@ -1204,7 +1204,7 @@ foreach my $Locale (@Locale) {
                     (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
 
                     # effectively is what [:cased:] would be if it existed.
-                    (/[[:upper:]]/i xor /[[:^upper:]]/i);
+                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
         }
         else {
             push @f, $_ unless   (/[[:alpha:]]/ xor /[[:^alpha:]]/)   ||
@@ -1220,7 +1220,7 @@ foreach my $Locale (@Locale) {
                     (/[[:upper:]]/ xor /[[:^upper:]]/)   ||
                     (/[[:word:]]/  xor /[[:^word:]]/)    ||
                     (/[[:xdigit:]]/ xor /[[:^xdigit:]]/) ||
-                    (/[[:upper:]]/i xor /[[:^upper:]]/i);
+                    (/[[:upper:][:lower:]]/i xor /[^[:upper:][:lower:]]/i);
         }
     }
     report_multi_result($Locale, $locales_test_number, \@f);
@@ -1988,12 +1988,15 @@ foreach my $Locale (@Locale) {
                 foreach my $err (keys %!) {
                     use Errno;
                     $! = eval "&Errno::$err";   # Convert to strerror() output
+                    my $errnum = 0+$!;
                     my $strerror = "$!";
                     if ("$strerror" =~ /\P{ASCII}/) {
                         $ok14 = utf8::is_utf8($strerror);
                         no locale;
                         $ok14_5 = "$!" !~ /\P{ASCII}/;
-                        debug(disp_str("non-ASCII \$!=$!")) if ! $ok14_5;
+                        debug( disp_str(
+                        "non-ASCII \$! for error $errnum='$strerror'"))
+                                                                   if ! 
$ok14_5;
                         last;
                     }
                 }
diff --git a/perl.c b/perl.c
index 21a8b30..6618535 100644
--- a/perl.c
+++ b/perl.c
@@ -3224,8 +3224,7 @@ Perl_moreswitches(pTHX_ const char *s)
                   s--;
              }
              PL_rs = newSVpvs("");
-             SvGROW(PL_rs, (STRLEN)(UVCHR_SKIP(rschar) + 1));
-             tmps = (U8*)SvPVX(PL_rs);
+             tmps = (U8*) SvGROW(PL_rs, (STRLEN)(UVCHR_SKIP(rschar) + 1));
              uvchr_to_utf8(tmps, rschar);
              SvCUR_set(PL_rs, UVCHR_SKIP(rschar));
              SvUTF8_on(PL_rs);
diff --git a/pp.c b/pp.c
index 124bf63..49b6abe 100644
--- a/pp.c
+++ b/pp.c
@@ -4109,8 +4109,7 @@ PP(pp_uc)
                      * allocate without allocating too much.  Such is life.
                      * See corresponding comment in lc code for another option
                      * */
-                    SvGROW(dest, min);
-                    d = (U8*)SvPVX(dest) + o;
+                    d = o + (U8*) SvGROW(dest, min);
                 }
                 Copy(tmpbuf, d, ulen, U8);
                 d += ulen;
@@ -4174,8 +4173,7 @@ PP(pp_uc)
                         * ASCII.  If not enough room, grow the string */
                        if (SvLEN(dest) < ++min) {      
                            const UV o = d - (U8*)SvPVX_const(dest);
-                           SvGROW(dest, min);
-                           d = (U8*)SvPVX(dest) + o;
+                           d = o + (U8*) SvGROW(dest, min);
                        }
                        *d++ = 'S'; *d = 'S'; /* upper case is 'SS' */
                        continue;   /* Back to the tight loop; still in ASCII */
@@ -4325,8 +4323,7 @@ PP(pp_lc)
                 * Another option would be to grow an extra byte or two more
                 * each time we need to grow, which would cut down the million
                 * to 500K, with little waste */
-               SvGROW(dest, min);
-               d = (U8*)SvPVX(dest) + o;
+               d = o + (U8*) SvGROW(dest, min);
            }
 
            /* Copy the newly lowercased letter to the output buffer we're
@@ -4520,8 +4517,7 @@ PP(pp_fc)
 
             if (ulen > u && (SvLEN(dest) < (min += ulen - u))) {
                 const UV o = d - (U8*)SvPVX_const(dest);
-                SvGROW(dest, min);
-                d = (U8*)SvPVX(dest) + o;
+                d = o + (U8*) SvGROW(dest, min);
             }
 
             Copy(tmpbuf, d, ulen, U8);
@@ -4600,8 +4596,7 @@ PP(pp_fc)
                      * becomes "ss", which may require growing the SV. */
                     if (SvLEN(dest) < ++min) {
                         const UV o = d - (U8*)SvPVX_const(dest);
-                        SvGROW(dest, min);
-                        d = (U8*)SvPVX(dest) + o;
+                        d = o + (U8*) SvGROW(dest, min);
                      }
                     *(d)++ = 's';
                     *d = 's';
diff --git a/regexec.c b/regexec.c
index 60ff2a0..380e378 100644
--- a/regexec.c
+++ b/regexec.c
@@ -458,7 +458,7 @@ S_isFOO_lc(pTHX_ const U8 classnum, const U8 character)
         case _CC_ENUM_ALPHA:     return isALPHA_LC(character);
         case _CC_ENUM_ASCII:     return isASCII_LC(character);
         case _CC_ENUM_BLANK:     return isBLANK_LC(character);
-        case _CC_ENUM_CASED:     return isLOWER_LC(character)
+        case _CC_ENUM_CASED:     return    isLOWER_LC(character)
                                         || isUPPER_LC(character);
         case _CC_ENUM_CNTRL:     return isCNTRL_LC(character);
         case _CC_ENUM_DIGIT:     return isDIGIT_LC(character);
diff --git a/toke.c b/toke.c
index b0e3736..ddc7e15 100644
--- a/toke.c
+++ b/toke.c
@@ -3098,8 +3098,7 @@ S_scan_const(pTHX_ char *start)
 
                 /* Subtract 3 for the bytes that were already accounted for
                  * (min, max, and the hyphen) */
-                SvGROW(sv, SvLEN(sv) + grow - 3);
-               d = SvPVX(sv) + save_offset;    /* refresh d after realloc */
+                d = save_offset + SvGROW(sv, SvLEN(sv) + grow - 3);
 
                 /* Here, we expand out the range.  On ASCII platforms, the
                  * compiler should optimize out the 'convert_unicode==TRUE'
diff --git a/utf8.c b/utf8.c
index 7d72c55..0b7cbda 100644
--- a/utf8.c
+++ b/utf8.c
@@ -638,14 +638,10 @@ Perl_utf8n_to_uvchr(pTHX_ const U8 *s, STRLEN curlen, 
STRLEN *retlen, U32 flags)
     /* Here is not a continuation byte, nor an invariant.  The only thing left
      * is a start byte (possibly for an overlong) */
 
-#ifdef EBCDIC
-    uv = NATIVE_UTF8_TO_I8(uv);
-#endif
-
-    /* Remove the leading bits that indicate the number of bytes in the
-     * character's whole UTF-8 sequence, leaving just the bits that are part of
-     * the value */
-    uv &= UTF_START_MASK(expectlen);
+    /* Convert to I8 on EBCDIC (no-op on ASCII), then remove the leading bits
+     * that indicate the number of bytes in the character's whole UTF-8
+     * sequence, leaving just the bits that are part of the value.  */
+    uv = NATIVE_UTF8_TO_I8(uv) & UTF_START_MASK(expectlen);
 
     /* Now, loop through the remaining bytes in the character's sequence,
      * accumulating each into the working value as we go.  Be sure to not look

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.25.3-282-g534752c

Reply via email to