[perl.git] branch blead, updated. v5.21.9-291-gfc96206

Karl Williamson Wed, 18 Mar 2015 08:45:06 -0700

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/fc962064cacbf4393def110b51a7bac805d9c3be?hp=49c4aee9770d41c2fd7866800ef51cfa28e02b58>


- Log -----------------------------------------------------------------
commit fc962064cacbf4393def110b51a7bac805d9c3be
Author: Karl Williamson <[email protected]>
Date:   Mon Mar 2 22:07:45 2015 -0700

    DBM_Filter/t/encode.t: temporarily skip until Encode fixed

M       lib/DBM_Filter/t/encode.t

commit f5b27708c2015c319d0178eda79bef9baeaa22a1
Author: Karl Williamson <[email protected]>
Date:   Mon Mar 2 21:31:07 2015 -0700

    porting/readme.t: TODO failing EBCDIC test
    
    This depends on Unicode::Collate, which is not yet working properly in
    EBCDIC

M       t/porting/readme.t

commit 8944ce7878105d0b5c56b840db0a95a593e01244
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 6 23:08:38 2014 -0700

    ext/SDBM_File/sdbm/dbu.c Generalize for EBCDIC platforms
    
    This also fixed a bug which hasn't shown up in the tests, in that it
    uses 'char' where it should be 'U8'.

M       ext/SDBM_File/dbu.c

commit 89ad707a5b059d12b8c7715313147fabda58d12f
Author: Karl Williamson <[email protected]>
Date:   Tue Mar 17 22:03:16 2015 -0600

    regexec.c: Fix improper warning.
    
    \b{} and \B{} are valid in UTF-8 locales, as all the Unicode rules
    apply.  Prior to this patch a warning was raised under some
    circumstances.  The warning text was generalized to handle both \b and
    \B cases.  The original text was only just added, in 5.21.9.

M       regexec.c
M       t/lib/warnings/regexec

commit a78e2a979dade2d426dbeef8214a0f27676be887
Author: Karl Williamson <[email protected]>
Date:   Mon Mar 16 15:52:18 2015 -0600

    re/pat_advanced.t: Tighten test
    
    This adds anchors to a pattern.  I discovered while changing things that
    it still passed when broken

M       t/re/pat_advanced.t

commit c440a570f986f52b764752007c070e8549b2bf7e
Author: Karl Williamson <[email protected]>
Date:   Tue Mar 17 16:56:34 2015 -0600

    regcomp.sym: Update \b descriptions

M       pod/perldebguts.pod
M       regcomp.sym
M       regnodes.h

commit b8cae652c696ff805cc1c46872c4aa89444dd1e8
Author: Karl Williamson <[email protected]>
Date:   Tue Mar 17 15:44:03 2015 -0600

    PATCH: [perl #124091] PP Data::Dumper fails on \n isolate
    
    Commit 31ac59b61698e704b64192de74793793f4b5b0c0 inadvertently changed
    the behavior of the pure perl version of Data::Dumper.  If a newline is
    the sole character in something being dumped with useqq, it no longer
    got translated into a \n sequence and was output raw.  This was due to
    the regex matching of \n at beginning and ends of strings.

M       dist/Data-Dumper/Dumper.pm
M       dist/Data-Dumper/t/dumper.t
-----------------------------------------------------------------------

Summary of changes:
 dist/Data-Dumper/Dumper.pm  |  2 +-
 dist/Data-Dumper/t/dumper.t | 11 ++++++++++-
 ext/SDBM_File/dbu.c         | 26 ++++++++++++++++++--------
 lib/DBM_Filter/t/encode.t   |  5 +++++
 pod/perldebguts.pod         | 28 +++++++++++++---------------
 regcomp.sym                 | 14 +++++++-------
 regexec.c                   | 10 +++++++---
 regnodes.h                  | 14 +++++++-------
 t/lib/warnings/regexec      | 31 +++++++++++++++++++++++++++++--
 t/porting/readme.t          |  2 ++
 t/re/pat_advanced.t         |  2 +-
 11 files changed, 100 insertions(+), 45 deletions(-)

diff --git a/dist/Data-Dumper/Dumper.pm b/dist/Data-Dumper/Dumper.pm
index 0ea2e77..e884298 100644
--- a/dist/Data-Dumper/Dumper.pm
+++ b/dist/Data-Dumper/Dumper.pm
@@ -761,7 +761,7 @@ sub qquote {
        # this.
        || (! $IS_ASCII && $] ge 5.008_001 && utf8::is_utf8($_));
 
-  return qq("$_") if / ^ [[:print:]]* $ /x;    # fast exit
+  return qq("$_") unless /[[:^print:]]/;  # fast exit if only printables
 
   # Here, there is at least one non-printable to output.  First, translate the
   # escapes.
diff --git a/dist/Data-Dumper/t/dumper.t b/dist/Data-Dumper/t/dumper.t
index fa3ce97..14f92dd 100644
--- a/dist/Data-Dumper/t/dumper.t
+++ b/dist/Data-Dumper/t/dumper.t
@@ -108,7 +108,7 @@ sub SKIP_TEST {
   ++$TNUM; print "ok $TNUM # skip $reason\n";
 }
 
-$TMAX = 444;
+$TMAX = 450;
 
 # Force Data::Dumper::Dump to use perl. We test Dumpxs explicitly by calling
 # it direct. Out here it lets us knobble the next if to test that the perl
@@ -1746,3 +1746,12 @@ EOT
         TEST (q(Data::Dumper::DumperX($foo)), 'EBCDIC outlier control: 
DumperX') if $XS;
     }
 }
+############# [perl #124091]
+{
+        $WANT = <<'EOT';
+#$VAR1 = "\n";
+EOT
+        local $Data::Dumper::Useqq = 1;
+        TEST (qq(Dumper("\n")), '\n alone');
+        TEST (qq(Data::Dumper::DumperX("\n")), '\n alone') if $XS;
+}
diff --git a/ext/SDBM_File/dbu.c b/ext/SDBM_File/dbu.c
index d861c0f..4631d40 100644
--- a/ext/SDBM_File/dbu.c
+++ b/ext/SDBM_File/dbu.c
@@ -224,19 +224,29 @@ static void
 prdatum(FILE *stream, datum d)
 {
        int c;
-       char *p = d.dptr;
+       U8 *p = (U8 *) d.dptr;
        int n = d.dsize;
 
        while (n--) {
-               c = *p++ & 0377;
+               c = *p++;
+#ifndef EBCDIC /* Meta notation doesn't make sense on EBCDIC systems*/
                if (c & 0200) {
-                       fprintf(stream, "M-");
-                       c &= 0177;
+                    fprintf(stream, "M-");
+                    c &= 0177;
                }
-               if (c == 0177 || c < ' ') 
-                       fprintf(stream, "^%c", (c == 0177) ? '?' : c + '@');
-               else
-                       putc(c, stream);
+#endif
+                /* \c notation applies for \0 . \x1f, plus \c? */
+                if (c <= 0x1F || c == QUESTION_MARK_CTRL) {
+                    fprintf(stream, "^%c", toCTRL(c));
+                }
+#ifdef EBCDIC   /* Instead of meta, use \x{} for non-printables */
+                else if (! isPRINT_A(c)) {
+                    fprintf(stream, "\\x{%02x}", c);
+               }
+#endif
+               else { /* must be an ASCII printable */
+                    putc(c, stream);
+                }
        }
 }
 
diff --git a/lib/DBM_Filter/t/encode.t b/lib/DBM_Filter/t/encode.t
index 35f501a..37a58ac 100644
--- a/lib/DBM_Filter/t/encode.t
+++ b/lib/DBM_Filter/t/encode.t
@@ -76,6 +76,10 @@ VerifyData(\%h1,
 eval { $db1->Filter_Pop() };
 is $@, '', "pop the 'utf8' filter" ;
 
+SKIP: {
+    skip "Encode doesn't currently work for most filters on EBCDIC, including 
8859-16", 11 if $::IS_EBCDIC || $::IS_EBCDIC;
+    # Actually the only thing failing below is the euro, because that's the
+    # only thing that's added in 8859-16.
 eval { $db1->Filter_Push('encode' => 'iso-8859-16') };
 is $@, '', "push an 'encode' filter (specify iso-8859-16)" ;
 
@@ -114,3 +118,4 @@ undef $db2;
     is $@, '', "untie without inner references" ;
 }
 
+}
diff --git a/pod/perldebguts.pod b/pod/perldebguts.pod
index 591e69b..2b5561d 100644
--- a/pod/perldebguts.pod
+++ b/pod/perldebguts.pod
@@ -572,24 +572,22 @@ will be lost.
  GPOS            no         Matches where last m//g left off.
 
  # Word Boundary Opcodes:
- BOUND           no         Match "" at any word boundary using native
-                            charset rules for non-utf8, otherwise
-                            Unicode rules
- BOUNDL          no         Match "" at any boundary of a given type
-                            using locale rules
+ BOUND           no         Like BOUNDA for non-utf8, otherwise match ""
+                            between any Unicode \w\W or \W\w
+ BOUNDL          no         Like BOUND/BOUNDU, but \w and \W are defined
+                            by current locale
  BOUNDU          no         Match "" at any boundary of a given type
                             using Unicode rules
- BOUNDA          no         Match "" at any boundary of a given type
-                            using ASCII rules
- NBOUND          no         Match "" at any word non-boundary using
-                            native charset rules for non-utf8, otherwise
-                            Unicode rules
- NBOUNDL         no         Match "" at any boundary of a given type
-                            using locale rules
- NBOUNDU         no         Match "" at any boundary of a given type
+ BOUNDA          no         Match "" at any boundary between \w\W or
+                            \W\w, where \w is [_a-zA-Z0-9]
+ NBOUND          no         Like NBOUNDA for non-utf8, otherwise match
+                            "" between any Unicode \w\w or \W\W
+ NBOUNDL         no         Like NBOUND/NBOUNDU, but \w and \W are
+                            defined by current locale
+ NBOUNDU         no         Match "" at any non-boundary of a given type
                             using using Unicode rules
- NBOUNDA         no         Match "" at any boundary of a given type
-                            using using ASCII rules
+ NBOUNDA         no         Match "" betweeen any \w\w or \W\W, where \w
+                            is [_a-zA-Z0-9]
 
  # [Special] alternatives:
  REG_ANY         no         Match any one character (except newline).
diff --git a/regcomp.sym b/regcomp.sym
index 7daa241..f79b874 100644
--- a/regcomp.sym
+++ b/regcomp.sym
@@ -43,15 +43,15 @@ GPOS        GPOS,       no        ; Matches where last m//g 
left off.
 # in regcomp.c uses the enum value of the modifier as an offset from the /d
 # version.  The complements must come after the non-complements.
 # BOUND, POSIX and their complements are affected, as well as EXACTF.
-BOUND       BOUND,      no        ; Match "" at any word boundary using native 
charset rules for non-utf8, otherwise Unicode rules
-BOUNDL      BOUND,      no        ; Match "" at any boundary of a given type 
using locale rules
+BOUND       BOUND,      no        ; Like BOUNDA for non-utf8, otherwise match 
"" between any Unicode \w\W or \W\w
+BOUNDL      BOUND,      no        ; Like BOUND/BOUNDU, but \w and \W are 
defined by current locale
 BOUNDU      BOUND,      no        ; Match "" at any boundary of a given type 
using Unicode rules
-BOUNDA      BOUND,      no        ; Match "" at any boundary of a given type 
using ASCII rules
+BOUNDA      BOUND,      no        ; Match "" at any boundary between \w\W or 
\W\w, where \w is [_a-zA-Z0-9]
 # All NBOUND nodes are required by code in regexec.c to be greater than all 
BOUND ones
-NBOUND      NBOUND,     no        ; Match "" at any word non-boundary using 
native charset rules for non-utf8, otherwise Unicode rules
-NBOUNDL     NBOUND,     no        ; Match "" at any boundary of a given type 
using locale rules
-NBOUNDU     NBOUND,     no        ; Match "" at any boundary of a given type 
using using Unicode rules
-NBOUNDA     NBOUND,     no        ; Match "" at any boundary of a given type 
using using ASCII rules
+NBOUND      NBOUND,     no        ; Like NBOUNDA for non-utf8, otherwise match 
"" between any Unicode \w\w or \W\W
+NBOUNDL     NBOUND,     no        ; Like NBOUND/NBOUNDU, but \w and \W are 
defined by current locale
+NBOUNDU     NBOUND,     no        ; Match "" at any non-boundary of a given 
type using using Unicode rules
+NBOUNDA     NBOUND,     no        ; Match "" betweeen any \w\w or \W\W, where 
\w is [_a-zA-Z0-9]
 
 #* [Special] alternatives:
 REG_ANY     REG_ANY,    no 0 S    ; Match any one character (except newline).
diff --git a/regexec.c b/regexec.c
index 5fb7288..cd03a4a 100644
--- a/regexec.c
+++ b/regexec.c
@@ -38,7 +38,7 @@
 #endif
 
 #define B_ON_NON_UTF8_LOCALE_IS_WRONG            \
-      "Use of \\b{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale"
+      "Use of \\b{} or \\B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 
locale"
 
 /*
  * pregcomp and pregexec -- regsub and regerror are not used in perl
@@ -2004,8 +2004,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
     case BOUNDL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (FLAGS(c) != TRADITIONAL_BOUND) {
-            Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+            if (! IN_UTF8_CTYPE_LOCALE) {
+                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
                                                 B_ON_NON_UTF8_LOCALE_IS_WRONG);
+            }
             goto do_boundu;
         }
 
@@ -2015,8 +2017,10 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
     case NBOUNDL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
         if (FLAGS(c) != TRADITIONAL_BOUND) {
-            Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
+            if (! IN_UTF8_CTYPE_LOCALE) {
+                Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE),
                                                 B_ON_NON_UTF8_LOCALE_IS_WRONG);
+            }
             goto do_nboundu;
         }
 
diff --git a/regnodes.h b/regnodes.h
index 144d6f6..3c9b991 100644
--- a/regnodes.h
+++ b/regnodes.h
@@ -19,14 +19,14 @@
 #define        MEOL                    5       /* 0x05 Same, assuming 
multiline: /$/m */
 #define        EOS                     6       /* 0x06 Match "" at end of 
string: /\z/ */
 #define        GPOS                    7       /* 0x07 Matches where last m//g 
left off. */
-#define        BOUND                   8       /* 0x08 Match "" at any word 
boundary using native charset rules for non-utf8, otherwise Unicode rules */
-#define        BOUNDL                  9       /* 0x09 Match "" at any 
boundary of a given type using locale rules */
+#define        BOUND                   8       /* 0x08 Like BOUNDA for 
non-utf8, otherwise match "" between any Unicode \w\W or \W\w */
+#define        BOUNDL                  9       /* 0x09 Like BOUND/BOUNDU, but 
\w and \W are defined by current locale */
 #define        BOUNDU                  10      /* 0x0a Match "" at any 
boundary of a given type using Unicode rules */
-#define        BOUNDA                  11      /* 0x0b Match "" at any 
boundary of a given type using ASCII rules */
-#define        NBOUND                  12      /* 0x0c Match "" at any word 
non-boundary using native charset rules for non-utf8, otherwise Unicode rules */
-#define        NBOUNDL                 13      /* 0x0d Match "" at any 
boundary of a given type using locale rules */
-#define        NBOUNDU                 14      /* 0x0e Match "" at any 
boundary of a given type using using Unicode rules */
-#define        NBOUNDA                 15      /* 0x0f Match "" at any 
boundary of a given type using using ASCII rules */
+#define        BOUNDA                  11      /* 0x0b Match "" at any 
boundary between \w\W or \W\w, where \w is [_a-zA-Z0-9] */
+#define        NBOUND                  12      /* 0x0c Like NBOUNDA for 
non-utf8, otherwise match "" between any Unicode \w\w or \W\W */
+#define        NBOUNDL                 13      /* 0x0d Like NBOUND/NBOUNDU, 
but \w and \W are defined by current locale */
+#define        NBOUNDU                 14      /* 0x0e Match "" at any 
non-boundary of a given type using using Unicode rules */
+#define        NBOUNDA                 15      /* 0x0f Match "" betweeen any 
\w\w or \W\W, where \w is [_a-zA-Z0-9] */
 #define        REG_ANY                 16      /* 0x10 Match any one character 
(except newline). */
 #define        SANY                    17      /* 0x11 Match any one 
character. */
 #define        CANY                    18      /* 0x12 Match any one byte. */
diff --git a/t/lib/warnings/regexec b/t/lib/warnings/regexec
index d956cb8..b62ff6e 100644
--- a/t/lib/warnings/regexec
+++ b/t/lib/warnings/regexec
@@ -160,5 +160,32 @@ setlocale(&POSIX::LC_CTYPE, "C");
 no warnings 'locale';
 "a" =~ /\b{gcb}/l;
 EXPECT
-Use of \b{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at - line 
8.
-Use of \b{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at - line 
8.
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 8.
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 8.
+########
+# NAME \b{} in UTF-8 locale
+require '../loc_tools.pl';
+unless (locales_enabled()) {
+    print("SKIPPED\n# locales not available\n"),exit;
+}
+eval { require POSIX; POSIX->import("locale_h") };
+if ($@) {
+    print("SKIPPED\n# no POSIX\n"),exit;
+}
+my $utf8_locale = find_utf8_ctype_locale();
+unless ($utf8_locale) {
+    print("SKIPPED\n# No UTF-8 locale available\n"),exit;
+}
+use warnings 'locale';
+use locale;
+setlocale(&POSIX::LC_CTYPE, "C");
+ "abc def" =~ /\b{wb}.*?/;
+ "abc def" =~ /\B{wb}.*?/;
+setlocale(&POSIX::LC_CTYPE, $utf8_locale);
+ "abc def" =~ /\b{wb}.*?/;
+ "abc def" =~ /\B{wb}.*?/;
+EXPECT
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 16.
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 16.
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 17.
+Use of \b{} or \B{} for non-UTF-8 locale is wrong.  Assuming a UTF-8 locale at 
- line 17.
diff --git a/t/porting/readme.t b/t/porting/readme.t
index 85d044e..e127920 100644
--- a/t/porting/readme.t
+++ b/t/porting/readme.t
@@ -52,6 +52,8 @@ eval {
 };
 
 if(@sorted_order) {
+    local $::TODO;
+    $::TODO = "Unicode::Collate not working on EBCDIC" if $::IS_EBCDIC || 
$::IS_EBCDIC;
     ok(eq_array(\@current_order, \@sorted_order), "Files are referenced in 
order") or
         print_right_order();
 }
diff --git a/t/re/pat_advanced.t b/t/re/pat_advanced.t
index fa324fd..3eaad63 100644
--- a/t/re/pat_advanced.t
+++ b/t/re/pat_advanced.t
@@ -995,7 +995,7 @@ sub run_tests {
         #
         my $w;
         local $SIG {__WARN__} = sub {$w .= "@_"};
-        $result = eval 'q(WARN) =~ /[\N{WARN}]/';
+        $result = eval 'q(WARN) =~ /^[\N{WARN}]$/';
         ok !$@ && $result && ! $w,  '\N{} returning multi-char works';
 
         undef $w;

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.21.9-291-gfc96206

Reply via email to