[perl.git] branch blead, updated. v5.17.6-231-gbf3cd0e

Karl Williamson Sun, 09 Dec 2012 08:48:04 -0800

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/bf3cd0e64298155c8984b0fce2cce3b4648d3110?hp=0d7b125b0c3d6ada9b99e6cdd424e39fbc3aa5f1>


- Log -----------------------------------------------------------------
commit bf3cd0e64298155c8984b0fce2cce3b4648d3110
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 19:20:22 2012 -0700

    locale.t: Add test
    
    This makes sure that taint isn't being added unnecessarily

M       lib/locale.t

commit a9b7c63715bd82e15b815c96e5e1c6b87ab46f7b
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 19:19:13 2012 -0700

    locale.t: Add optional test name supplement
    
    This allows tests to have extra stuff passed in that will be printed as
    part of the test results

M       lib/locale.t

commit a9c4383684955582336fa9f07ca1f378d84bdb99
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 19:07:41 2012 -0700

    regcomp.c: Typo in comment; white space

M       regcomp.c

commit 76a77b1b5dc44d6f70bf59fdc331a1332081b301
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 11:31:19 2012 -0700

    pp.c pp_pack.c: Use macro instead of function
    
    This converts to use is_SPACE_utf8() instead of the (soon to be
    deprecated) is_utf8_space().  The macro is faster, avoiding the function
    call completely, so the performance need to make a special case for a SPACE
    character is gone.

M       pp.c
M       pp_pack.c

commit 009ccfeda269d5b353a297ca4059497343c8ce41
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 09:20:51 2012 -0700

    regcomp.c:regprop: [bracketize] \w..., add \v
    
    This function returns the name of a character class given its number.
    This changes the name of \w, \s, \d to be [\w] ....  And it adds an
    entry for \v and \V.  These makes a complete set, and will make things
    easier to read, as a result of changes coming in future commits

M       regcomp.c

commit e699a1d5e627c6248a14e00e813d5a7a53f0947d
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 5 21:53:06 2012 -0700

    regexec.c: Nits coding standards-type changing

M       regexec.c

commit eb42f199581875bfa31b7e27ea4c1425417d0e5a
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 5 21:39:21 2012 -0700

    perl.h: Add comments

M       perl.h

commit 41805eb96d4ab7da622f82f6104ab8fa95527f33
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 5 21:37:26 2012 -0700

    perlrecharclass: Fix defn of [:word:]

M       pod/perlrecharclass.pod

commit 583d08e34bb7232227da37acc36ec737ce653671
Author: Karl Williamson <[email protected]>
Date:   Sun Dec 2 19:18:34 2012 -0700

    intrpvar.h: Add comment

M       intrpvar.h

commit d87fc68d13011e0743cc7850f0c3fceb4633864f
Author: Karl Williamson <[email protected]>
Date:   Sun Dec 2 18:50:44 2012 -0700

    Add Todo test for Perl #114272

M       t/re/re_tests

commit 6c46377d3226d15447819a5fa3518c660bce7679
Author: Karl Williamson <[email protected]>
Date:   Sun Dec 2 18:48:33 2012 -0700

    utf8.c: Combine 2 function calls into one
    
    There is a function that does both these together, more efficiently

M       utf8.c

commit b81740c0c3e6a549e6766887035f48b39a45557c
Author: Karl Williamson <[email protected]>
Date:   Sun Dec 2 18:47:23 2012 -0700

    utf8.c: Move ARGS_ASSERT to earlier in function
    
    to a place where people more expect to see it.

M       utf8.c

commit 9e3e825e7fea9a67d23db17933ab6c803f890219
Author: Karl Williamson <[email protected]>
Date:   Sun Dec 2 21:20:36 2012 -0700

    embed.fnc: Add missing entry
    
    This function is defined in utf8.c, but isn't called by the core, and
    there was no entry for it in embed.fnc

M       embed.fnc
M       embed.h
M       proto.h

commit a1894d81735066945ef520af52cc180d1e0dfb10
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 6 22:42:18 2012 -0700

    Silence some g++ compiler warnings
    
    Changing these slightly got rid of the warnings like:
    toke.c:9168: warning: format not a string literal and no format arguments

M       doio.c
M       pp.c
M       pp_ctl.c
M       toke.c

commit 6c2f1a55be4cd937e7e87f2ffb875a22688fb01e
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 8 21:42:23 2012 -0700

    intrpvar.h: Use #define instead of hard-coded number
    
    The number 12 is mysterious as to why we are using it otherwise.

M       intrpvar.h
-----------------------------------------------------------------------

Summary of changes:
 doio.c                  |    2 +-
 embed.fnc               |    1 +
 embed.h                 |    1 +
 intrpvar.h              |    4 ++--
 lib/locale.t            |   16 ++++++++++++----
 perl.h                  |    6 ++++++
 pod/perlrecharclass.pod |   11 ++++++-----
 pp.c                    |   12 ++++++------
 pp_ctl.c                |    4 ++--
 pp_pack.c               |    2 +-
 proto.h                 |    4 ++++
 regcomp.c               |   20 +++++++++++---------
 regexec.c               |    7 +++++--
 t/re/re_tests           |    3 +++
 toke.c                  |    4 ++--
 utf8.c                  |    7 +++----
 16 files changed, 66 insertions(+), 38 deletions(-)

diff --git a/doio.c b/doio.c
index e915bd5..08ed433 100644
--- a/doio.c
+++ b/doio.c
@@ -1333,7 +1333,7 @@ I32
 Perl_my_lstat_flags(pTHX_ const U32 flags)
 {
     dVAR;
-    static const char no_prev_lstat[] = "The stat preceding -l _ wasn't an 
lstat";
+    static const char* const no_prev_lstat = "The stat preceding -l _ wasn't 
an lstat";
     dSP;
     const char *file;
     if (PL_op->op_flags & OPf_REF) {
diff --git a/embed.fnc b/embed.fnc
index 3dea370..84c0b19 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -637,6 +637,7 @@ AMpR        |bool   |_is_uni_perl_idstart|UV c
 ApPR   |bool   |is_uni_alpha_lc|UV c
 ApPR   |bool   |is_uni_ascii_lc|UV c
 ApPR   |bool   |is_uni_space_lc|UV c
+AMpPR  |bool   |is_uni_blank_lc|UV c
 ApPR   |bool   |is_uni_cntrl_lc|UV c
 ApPR   |bool   |is_uni_graph_lc|UV c
 ApPR   |bool   |is_uni_digit_lc|UV c
diff --git a/embed.h b/embed.h
index 25bd724..95f9943 100644
--- a/embed.h
+++ b/embed.h
@@ -230,6 +230,7 @@
 #define is_uni_ascii(a)                Perl_is_uni_ascii(aTHX_ a)
 #define is_uni_ascii_lc(a)     Perl_is_uni_ascii_lc(aTHX_ a)
 #define is_uni_blank(a)                Perl_is_uni_blank(aTHX_ a)
+#define is_uni_blank_lc(a)     Perl_is_uni_blank_lc(aTHX_ a)
 #define is_uni_cntrl(a)                Perl_is_uni_cntrl(aTHX_ a)
 #define is_uni_cntrl_lc(a)     Perl_is_uni_cntrl_lc(aTHX_ a)
 #define is_uni_digit(a)                Perl_is_uni_digit(aTHX_ a)
diff --git a/intrpvar.h b/intrpvar.h
index 52b45ba..f58c0d1 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -613,7 +613,7 @@ PERLVAR(I, NonL1NonFinalFold,   SV *)
 PERLVAR(I, HasMultiCharFold,   SV *)
 
 /* utf8 character class swashes */
-PERLVAR(I, utf8_alnum, SV *)
+PERLVAR(I, utf8_alnum, SV *)   /* Should really be named "utf8_wordchar" */
 PERLVAR(I, utf8_alpha, SV *)
 PERLVAR(I, utf8_graph, SV *)
 PERLVAR(I, utf8_digit, SV *)
@@ -634,7 +634,7 @@ PERLVAR(I, utf8_charname_continue, SV *)
 PERLVAR(I, last_swash_hv, HV *)
 PERLVAR(I, last_swash_tmps, U8 *)
 PERLVAR(I, last_swash_slen, STRLEN)
-PERLVARA(I, last_swash_key,12, U8)
+PERLVARA(I, last_swash_key,UTF8_MAXBYTES-1, U8)
 PERLVAR(I, last_swash_klen, U8)                /* Only needs to store 0-12  */
 
 #ifdef FCRYPT
diff --git a/lib/locale.t b/lib/locale.t
index 26a7bd4..6d491e4 100644
--- a/lib/locale.t
+++ b/lib/locale.t
@@ -89,14 +89,22 @@ sub is_tainted { # hello, camel two.
     not eval { $dummy = join("", @_), kill 0; 1 }
 }
 
-sub check_taint ($) {
-    ok is_tainted($_[0]), "verify that is tainted";
+sub check_taint ($;$) {
+    my $message_tail = $_[1] // "";
+    $message_tail = ": $message_tail" if $message_tail;
+    ok is_tainted($_[0]), "verify that is tainted$message_tail";
 }
 
-sub check_taint_not ($) {
-    ok((not is_tainted($_[0])), "verify that isn't tainted");
+sub check_taint_not ($;$) {
+    my $message_tail = $_[1] // "";
+    $message_tail = ": $message_tail" if $message_tail;
+    ok((not is_tainted($_[0])), "verify that isn't tainted$message_tail");
 }
 
+"\tb\t" =~ /^m?(\s)(.*)\1$/;
+check_taint_not   $&, "not tainted outside 'use locale'";
+;
+
 use locale;    # engage locale and therefore locale taint.
 
 check_taint_not   $a;
diff --git a/perl.h b/perl.h
index b13521a..d97a577 100644
--- a/perl.h
+++ b/perl.h
@@ -5006,8 +5006,14 @@ struct interpreter {
 
 /* Set up PERLVAR macros for populating structs */
 #  define PERLVAR(prefix,var,type) type prefix##var;
+
+/* 'var' is an array of length 'n' */
 #  define PERLVARA(prefix,var,n,type) type prefix##var[n];
+
+/* initialize 'var' to init' */
 #  define PERLVARI(prefix,var,type,init) type prefix##var;
+
+/* like PERLVARI, but make 'var' a const */
 #  define PERLVARIC(prefix,var,type,init) type prefix##var;
 
 struct interpreter {
diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
index 7dafc54..7478932 100644
--- a/pod/perlrecharclass.pod
+++ b/pod/perlrecharclass.pod
@@ -140,11 +140,12 @@ Any character not matched by C<\d> is matched by C<\D>.
 =head3 Word characters
 
 A C<\w> matches a single alphanumeric character (an alphabetic character, or a
-decimal digit) or a connecting punctuation character, such as an
-underscore ("_").  It does not match a whole word.  To match a whole
-word, use C<\w+>.  This isn't the same thing as matching an English word, but
-in the ASCII range it is the same as a string of Perl-identifier
-characters.
+decimal digit); or a connecting punctuation character, such as an
+underscore ("_"); or a "mark" character (like some sort of accent) that
+attaches to one of those.  It does not match a whole word.  To match a
+whole word, use C<\w+>.  This isn't the same thing as matching an
+English word, but in the ASCII range it is the same as a string of
+Perl-identifier characters.
 
 =over
 
diff --git a/pp.c b/pp.c
index cf59a84..33943e4 100644
--- a/pp.c
+++ b/pp.c
@@ -1646,7 +1646,7 @@ PP(pp_repeat)
 
     if (GIMME == G_ARRAY && PL_op->op_private & OPpREPEAT_DOLIST) {
        dMARK;
-       static const char oom_list_extend[] = "Out of memory during list 
extend";
+       static const char* const oom_list_extend = "Out of memory during list 
extend";
        const I32 items = SP - MARK;
        const I32 max = items * count;
 
@@ -1698,7 +1698,7 @@ PP(pp_repeat)
        SV * const tmpstr = POPs;
        STRLEN len;
        bool isutf;
-       static const char oom_string_extend[] =
+       static const char* const oom_string_extend =
          "Out of memory during string extend";
 
        if (TARG != tmpstr)
@@ -5380,7 +5380,7 @@ PP(pp_split)
     orig = s;
     if (skipwhite) {
        if (do_utf8) {
-           while (*s == ' ' || is_utf8_space((U8*)s))
+           while (isSPACE_utf8(s))
                s += UTF8SKIP(s);
        }
        else if (get_regex_charset(RX_EXTFLAGS(rx)) == REGEX_LOCALE_CHARSET) {
@@ -5405,9 +5405,9 @@ PP(pp_split)
            m = s;
            /* this one uses 'm' and is a negative test */
            if (do_utf8) {
-               while (m < strend && !( *m == ' ' || is_utf8_space((U8*)m) )) {
+               while (m < strend && ! isSPACE_utf8(m) ) {
                    const int t = UTF8SKIP(m);
-                   /* is_utf8_space returns FALSE for malform utf8 */
+                   /* isSPACE_utf8 returns FALSE for malform utf8 */
                    if (strend - m < t)
                        m = strend;
                    else
@@ -5444,7 +5444,7 @@ PP(pp_split)
 
            /* this one uses 's' and is a positive test */
            if (do_utf8) {
-               while (s < strend && ( *s == ' ' || is_utf8_space((U8*)s) ))
+               while (s < strend && isSPACE_utf8(s) )
                    s +=  UTF8SKIP(s);
            }
            else if (get_regex_charset(RX_EXTFLAGS(rx)) == 
REGEX_LOCALE_CHARSET) {
diff --git a/pp_ctl.c b/pp_ctl.c
index 0256070..cd5033f 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -2692,7 +2692,7 @@ S_dofindlabel(pTHX_ OP *o, const char *label, STRLEN len, 
U32 flags, OP **opstac
 {
     dVAR;
     OP **ops = opstack;
-    static const char too_deep[] = "Target of goto is too deeply nested";
+    static const char* const too_deep = "Target of goto is too deeply nested";
 
     PERL_ARGS_ASSERT_DOFINDLABEL;
 
@@ -2764,7 +2764,7 @@ PP(pp_goto)
     STRLEN label_len = 0;
     U32 label_flags = 0;
     const bool do_dump = (PL_op->op_type == OP_DUMP);
-    static const char must_have_label[] = "goto must have label";
+    static const char* const must_have_label = "goto must have label";
 
     if (PL_op->op_flags & OPf_STACKED) {
        SV * const sv = POPs;
diff --git a/pp_pack.c b/pp_pack.c
index a2a5c68..321a47d 100644
--- a/pp_pack.c
+++ b/pp_pack.c
@@ -1470,7 +1470,7 @@ S_unpack_rec(pTHX_ tempsym_t* symptr, const char *s, 
const char *strbeg, const c
                if (utf8 && (symptr->flags & FLAG_WAS_UTF8)) {
                    for (ptr = s+len-1; ptr >= s; ptr--)
                        if (*ptr != 0 && !UTF8_IS_CONTINUATION(*ptr) &&
-                           !is_utf8_space((U8 *) ptr)) break;
+                           !isSPACE_utf8(ptr)) break;
                    if (ptr >= s) ptr += UTF8SKIP(ptr);
                    else ptr++;
                    if (ptr > s+len)
diff --git a/proto.h b/proto.h
index 2351b32..1a29b2f 100644
--- a/proto.h
+++ b/proto.h
@@ -1701,6 +1701,10 @@ PERL_CALLCONV bool       Perl_is_uni_blank(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
 
+PERL_CALLCONV bool     Perl_is_uni_blank_lc(pTHX_ UV c)
+                       __attribute__warn_unused_result__
+                       __attribute__pure__;
+
 PERL_CALLCONV bool     Perl_is_uni_cntrl(pTHX_ UV c)
                        __attribute__warn_unused_result__
                        __attribute__pure__;
diff --git a/regcomp.c b/regcomp.c
index 4736bbd..979dcae 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -12630,7 +12630,7 @@ parseit:
 
         /* If the highest code point is within Latin1, we can use the
          * compiled-in Alphas list, and not have to go out to disk.  This
-         * yields two false positives, the masculine and feminine oridinal
+         * yields two false positives, the masculine and feminine ordinal
          * indicators, which are weeded out below using the
          * IS_IN_SOME_FOLD_L1() macro */
         if (invlist_highest(cp_list) < 256) {
@@ -12672,7 +12672,7 @@ parseit:
                         assert(PL_utf8_tofold); /* Verify that worked */
                     }
                     PL_utf8_foldclosures =
-                                        _swash_inversion_hash(PL_utf8_tofold);
+                                    _swash_inversion_hash(PL_utf8_tofold);
                 }
             }
 
@@ -13822,12 +13822,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o)
 
     /* Should be synchronized with * ANYOF_ #xdefines in regcomp.h */
     static const char * const anyofs[] = {
-        "\\w",
-        "\\W",
-        "\\s",
-        "\\S",
-        "\\d",
-        "\\D",
+        "[\\w]",
+        "[\\W]",
+        "[\\s]",
+        "[\\S]",
+        "[\\d]",
+        "[\\D]",
         "[:alnum:]",
         "[:^alnum:]",
         "[:alpha:]",
@@ -13851,7 +13851,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o)
         "[:space:]",
         "[:^space:]",
         "[:blank:]",
-        "[:^blank:]"
+        "[:^blank:]",
+        "[\\v]",
+        "[\\V]"
     };
     RXi_GET_DECL(prog,progi);
     GET_RE_DEBUG_FLAGS_DECL;
diff --git a/regexec.c b/regexec.c
index 3b2f012..d235cde 100644
--- a/regexec.c
+++ b/regexec.c
@@ -4455,7 +4455,10 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
                if (nextchr == '\r' /* And if it was CR, and the next is LF,
                                       match the LF */
                    && locinput < PL_regeol
-                   && UCHARAT(locinput) == '\n') locinput++;
+                   && UCHARAT(locinput) == '\n')
+                {
+                    locinput++;
+                }
            }
            else {
 
@@ -4473,7 +4476,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
                    char *starting = locinput;
 
                    /* In case have to backtrack the last prepend */
-                   char *previous_prepend = 0;
+                   char *previous_prepend = NULL;
 
                    LOAD_UTF8_CHARCLASS_GCB();
 
diff --git a/t/re/re_tests b/t/re/re_tests
index fff9e4c..5321da6 100644
--- a/t/re/re_tests
+++ b/t/re/re_tests
@@ -1716,4 +1716,7 @@ ab[c\\\](??{"x"})]{3}d    ab\\](d y       -       -
 /^\S{11}/a     
\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}
    n       -       -
 /^\W{11}/a     
\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}\x{10FFFF}
    n       -       -
 
+# [ perl #114272]
+\Vn    \xFFn/  yT      $&      \xFFn
+
 # vim: softtabstop=0 noexpandtab
diff --git a/toke.c b/toke.c
index 16f359e..628a5da 100644
--- a/toke.c
+++ b/toke.c
@@ -110,7 +110,7 @@ Individual members of C<PL_parser> have their own 
documentation.
 #  define PL_nextval           (PL_parser->nextval)
 #endif
 
-static const char ident_too_long[] = "Identifier too long";
+static const char* const ident_too_long = "Identifier too long";
 
 #ifdef PERL_MAD
 #  define CURMAD(slot,sv) if (PL_madskills) { curmad(slot,sv); sv = 0; }
@@ -10646,7 +10646,7 @@ Perl_scan_num(pTHX_ const char *start, YYSTYPE* lvalp)
     SV *sv = NULL;                     /* place to put the converted number */
     bool floatit;                      /* boolean: int or float? */
     const char *lastub = NULL;         /* position of last underbar */
-    static char const number_too_long[] = "Number too long";
+    static const char* const number_too_long = "Number too long";
 
     PERL_ARGS_ASSERT_SCAN_NUM;
 
diff --git a/utf8.c b/utf8.c
index 70620af..418f0d8 100644
--- a/utf8.c
+++ b/utf8.c
@@ -3924,6 +3924,8 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
 
     SV* invlist;
 
+    PERL_ARGS_ASSERT__SWASH_TO_INVLIST;
+
     /* If not a hash, it must be the swash's inversion list instead */
     if (SvTYPE(hv) != SVt_PVHV) {
         return (SV*) hv;
@@ -3940,8 +3942,6 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
     bits  = SvUV(*bitssvp);
     octets = bits >> 3; /* if bits == 1, then octets == 0 */
 
-    PERL_ARGS_ASSERT__SWASH_TO_INVLIST;
-
     /* read $swash->{LIST} */
     if (SvPOK(*listsvp)) {
        l = (U8*)SvPV(*listsvp, lcur);
@@ -4055,8 +4055,7 @@ Perl__swash_to_invlist(pTHX_ SV* const swash)
            _invlist_union(invlist, other, &invlist);
            break;
        case '!':
-           _invlist_invert(other);
-           _invlist_union(invlist, other, &invlist);
+            _invlist_union_maybe_complement_2nd(invlist, other, TRUE, 
&invlist);
            break;
        case '-':
            _invlist_subtract(invlist, other, &invlist);

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.17.6-231-gbf3cd0e

Reply via email to