[perl.git] branch blead, updated. v5.23.7-406-g3a3d108

Karl Williamson Thu, 18 Feb 2016 19:27:43 -0800

In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/3a3d108484629fe5b421976b8d6fd6f280a1f97a?hp=52ffb1b5f4ae92a18ec49ded8308d386abc08f03>


- Log -----------------------------------------------------------------
commit 3a3d108484629fe5b421976b8d6fd6f280a1f97a
Author: Karl Williamson <[email protected]>
Date:   Mon Feb 15 11:02:07 2016 -0700

    Cast correctly to U8, not char
    
    U8 is what the function being called is expecting

M       regcomp.c

commit 768f8d987cfefb34c0b9c1a2e5c65b4448b4b937
Author: Karl Williamson <[email protected]>
Date:   Mon Feb 15 20:59:10 2016 -0700

    perlapi: Hide the swash functions
    
    These should be internal only, and we may want to get rid of them
    someday.  Hide their existence so that people who don't already know
    about them won't be tempted to try to use them.

M       embed.fnc

commit b271ede139c499b0a38fbeff3a5a032adcb54955
Author: Karl Williamson <[email protected]>
Date:   Mon Feb 15 20:32:32 2016 -0700

    regcomp.h: Not all ANYOF flags are in use.
    
    So, it's better to not have a mask to include the unused ones.

M       regcomp.h

commit 346aefe94ef87336bae3ae96d8047475e3361709
Author: Karl Williamson <[email protected]>
Date:   Sat Feb 13 17:21:28 2016 -0700

    regcomp.c: Simplify a few lines of code
    
    This code had been written before the isMNEMONIC_CNTRL() macro was
    created.  Using the macro simplifies things a little.

M       regcomp.c

commit 7240094943c6fdbc40220518824ce9120ca0fab5
Author: Karl Williamson <[email protected]>
Date:   Sat Feb 13 15:51:50 2016 -0700

    regcomp.c: Clean up logic in function
    
    This function uses some crude heuristics to decide whether to make a
    synthetic start class or not.  This commit removes some redundancies.

M       regcomp.c

commit cab181dd2f1897a30514c8f9efb56be269495619
Author: Karl Williamson <[email protected]>
Date:   Thu Feb 11 10:25:04 2016 -0700

    regcomp.c: -Dr \xZZ instead of \x{ZZ}
    
    The brackets are unnecessary and clutter the output.

M       regcomp.c

commit c0e8a4e1861b0a6637457cb7da1ff9b89f90e22f
Author: Karl Williamson <[email protected]>
Date:   Thu Feb 11 10:12:57 2016 -0700

    regcomp.c: Fix -Dr bug
    
    It was using a wrong length calculation, which under some circumstances
    caused the output to include extra bytes.  Also I added comments, and
    changed a variable name, so I don't have to figure this out again from
    scratch.

M       regcomp.c

commit c3caadb53923cd6d6dc69e426f00325a943ea81f
Author: Karl Williamson <[email protected]>
Date:   Mon Feb 15 11:04:36 2016 -0700

    regcomp.c: Use macro to hide complexity
    
    There is an existing macro that does these three lines in one source
    line.

M       regcomp.c

commit fe0a36465f2c9353aeec20e4a5d22f6681bb28bb
Author: Karl Williamson <[email protected]>
Date:   Sat Feb 13 13:49:00 2016 -0700

    Don't allow /\N{}/ under 're strict'
    
    This is the one remaining empty {} that was accepted under the
    experimental 'use re "strict"'.

M       embed.fnc
M       embed.h
M       pod/perldelta.pod
M       pod/perldiag.pod
M       proto.h
M       regcomp.c
M       t/re/reg_mesg.t

commit a60b792277ab2836704066899135e8d4c83b0f0c
Author: Karl Williamson <[email protected]>
Date:   Sat Feb 13 15:20:49 2016 -0700

    perlrecharclass: Add some missing info

M       pod/perlrecharclass.pod
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc               |  7 +++--
 embed.h                 |  2 +-
 pod/perldelta.pod       |  6 ++++
 pod/perldiag.pod        |  7 +++--
 pod/perlrecharclass.pod |  3 ++
 proto.h                 |  2 +-
 regcomp.c               | 82 ++++++++++++++++++++++++-------------------------
 regcomp.h               |  2 +-
 t/re/reg_mesg.t         |  4 ++-
 9 files changed, 64 insertions(+), 51 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 23e1e52..f5ace28 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -1164,7 +1164,7 @@ Amb       |OP*    |ref            |NULLOK OP* o|I32 type
 s      |OP*    |refkids        |NULLOK OP* o|I32 type
 #endif
 Ap     |void   |regdump        |NN const regexp* r
-Ap     |SV*    |regclass_swash |NULLOK const regexp *prog \
+ApM    |SV*    |regclass_swash |NULLOK const regexp *prog \
                                |NN const struct regnode *node|bool doinit \
                                |NULLOK SV **listsvp|NULLOK SV **altsvp
 #if defined(PERL_IN_REGCOMP_C) || defined(PERL_IN_PERL_C) || 
defined(PERL_IN_UTF8_C)
@@ -1517,8 +1517,8 @@ Apd       |void   |sv_vsetpvfn    |NN SV *const sv|NN 
const char *const pat|const STRLEN pa
                                |NULLOK va_list *const args|NULLOK SV **const 
svargs \
                                |const I32 svmax|NULLOK bool *const 
maybe_tainted
 ApR    |NV     |str_to_version |NN SV *sv
-ApR    |SV*    |swash_init     |NN const char* pkg|NN const char* name|NN SV* 
listsv|I32 minbits|I32 none
-Ap     |UV     |swash_fetch    |NN SV *swash|NN const U8 *ptr|bool do_utf8
+ApRM   |SV*    |swash_init     |NN const char* pkg|NN const char* name|NN SV* 
listsv|I32 minbits|I32 none
+ApM    |UV     |swash_fetch    |NN SV *swash|NN const U8 *ptr|bool do_utf8
 #ifdef PERL_IN_REGCOMP_C
 EiMR   |SV*    |add_cp_to_invlist      |NULLOK SV* invlist|const UV cp
 EsM    |void   |_append_range_to_invlist   |NN SV* const invlist|const UV 
start|const UV end
@@ -2174,6 +2174,7 @@ Es        |bool   |grok_bslash_N  |NN RExC_state_t 
*pRExC_state               \
                                |NULLOK UV *code_point_p                    \
                                |NULLOK int* cp_count                       \
                                |NN I32 *flagp                              \
+                               |const bool strict                          \
                                |const U32 depth
 Es     |void   |reginsert      |NN RExC_state_t *pRExC_state \
                                |U8 op|NN regnode *opnd|U32 depth
diff --git a/embed.h b/embed.h
index a1368ea..a12a3e6 100644
--- a/embed.h
+++ b/embed.h
@@ -999,7 +999,7 @@
 #define edit_distance          S_edit_distance
 #define get_ANYOF_cp_list_for_ssc(a,b) S_get_ANYOF_cp_list_for_ssc(aTHX_ a,b)
 #define get_invlist_iter_addr  S_get_invlist_iter_addr
-#define grok_bslash_N(a,b,c,d,e,f)     S_grok_bslash_N(aTHX_ a,b,c,d,e,f)
+#define grok_bslash_N(a,b,c,d,e,f,g)   S_grok_bslash_N(aTHX_ a,b,c,d,e,f,g)
 #define handle_named_backref(a,b,c,d)  S_handle_named_backref(aTHX_ a,b,c,d)
 #define handle_possible_posix(a,b,c,d) S_handle_possible_posix(aTHX_ a,b,c,d)
 #define handle_regex_sets(a,b,c,d,e)   S_handle_regex_sets(aTHX_ a,b,c,d,e)
diff --git a/pod/perldelta.pod b/pod/perldelta.pod
index 5607b2e..55db093 100644
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -52,6 +52,12 @@ XXX For a release on a stable branch, this section aspires 
to be:
 
 [ List each incompatible change as a =head2 entry ]
 
+=head2 C<qr/\N{}/> now disallowed under C<use re "strict">
+
+An empty C<\N{}> makes no sense, but for backwards compatibility is
+silently accepted as doing nothing.  But now this is a fatal error under
+the experimental feature L<re/'strict' mode>.
+
 =head1 Deprecations
 
 XXX Any deprecated features, syntax, modules etc. should be listed here.
diff --git a/pod/perldiag.pod b/pod/perldiag.pod
index cc27016..1e4760d 100644
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -7203,9 +7203,10 @@ Something Very Wrong.
 
 (F) Named Unicode character escapes (C<\N{...}>) may return a zero-length
 sequence.  Such an escape was used in an extended character class, i.e.
-C<(?[...])>, which is not permitted.  Check that the correct escape has
-been used, and the correct charnames handler is in scope.  The S<<-- HERE>
-shows whereabouts in the regular expression the problem was discovered.
+C<(?[...])>, or under C<use re 'strict'>, which is not permitted.  Check
+that the correct escape has been used, and the correct charnames handler
+is in scope.  The S<<-- HERE> shows whereabouts in the regular
+expression the problem was discovered.
 
 =back
 
diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod
index ef8048f..7f5a4ef 100644
--- a/pod/perlrecharclass.pod
+++ b/pod/perlrecharclass.pod
@@ -999,6 +999,9 @@ use it will raise a warning, unless disabled via
 Comments on this feature are welcome; send email to
 C<[email protected]>.
 
+The rules used by L<C<use re 'strict>|re/'strict' mode> apply to this
+construct.
+
 We can extend the example above:
 
  /(?[ ( \p{Thai} + \p{Lao} ) & \p{Digit} ])/
diff --git a/proto.h b/proto.h
index 4f2d687..c3adf2d 100644
--- a/proto.h
+++ b/proto.h
@@ -4747,7 +4747,7 @@ PERL_STATIC_INLINE STRLEN*        
S_get_invlist_iter_addr(SV* invlist)
 #define PERL_ARGS_ASSERT_GET_INVLIST_ITER_ADDR \
        assert(invlist)
 
-STATIC bool    S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** 
nodep, UV *code_point_p, int* cp_count, I32 *flagp, const U32 depth);
+STATIC bool    S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state, regnode** 
nodep, UV *code_point_p, int* cp_count, I32 *flagp, const bool strict, const 
U32 depth);
 #define PERL_ARGS_ASSERT_GROK_BSLASH_N \
        assert(pRExC_state); assert(flagp)
 PERL_STATIC_INLINE regnode*    S_handle_named_backref(pTHX_ RExC_state_t 
*pRExC_state, I32 *flagp, char * parse_start, char ch);
diff --git a/regcomp.c b/regcomp.c
index f3b185c..5dbccfb 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -1822,35 +1822,28 @@ S_is_ssc_worth_it(const RExC_state_t * pRExC_state, 
const regnode_ssc * ssc)
      *      (unassigned, private use, surrogates, controls and formats).  This
      *      is a much large number. */
 
-    const U32 max_match = (LOC)
-                          ? 127
-                          : (! UNI_SEMANTICS)
-                            ? 63
-                            : (invlist_highest(ssc->invlist) < 256)
-                              ? 127
-                              : ((NON_OTHER_COUNT + 1) / 2) - 1;
     U32 count = 0;      /* Running total of number of code points matched by
                            'ssc' */
     UV start, end;      /* Start and end points of current range in inversion
                            list */
+    const U32 max_code_points = (LOC)
+                                ?  256
+                                : ((   ! UNI_SEMANTICS
+                                     || invlist_highest(ssc->invlist) < 256)
+                                  ? 128
+                                  : NON_OTHER_COUNT);
+    const U32 max_match = max_code_points / 2;
 
     PERL_ARGS_ASSERT_IS_SSC_WORTH_IT;
 
     invlist_iterinit(ssc->invlist);
     while (invlist_iternext(ssc->invlist, &start, &end)) {
-
-        /* /u is the only thing that we expect to match above 255; so if not /u
-         * and even if there are matches above 255, ignore them.  This catches
-         * things like \d under /d which does match the digits above 255, but
-         * since the pattern is /d, it is not likely to be expecting them */
-        if (! UNI_SEMANTICS) {
-            if (start > 255) {
-                break;
-            }
-            end = MIN(end, 255);
+        if (start >= max_code_points) {
+            break;
         }
+        end = MIN(end, max_code_points - 1);
         count += end - start + 1;
-        if (count > max_match) {
+        if (count >= max_match) {
             invlist_iterfinish(ssc->invlist);
             return FALSE;
         }
@@ -11394,6 +11387,7 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                 UV * code_point_p,
                 int * cp_count,
                 I32 * flagp,
+                const bool strict,
                 const U32 depth
     )
 {
@@ -11543,6 +11537,10 @@ S_grok_bslash_N(pTHX_ RExC_state_t *pRExC_state,
                                         semantics */
 
     if (endbrace == RExC_parse) {   /* empty: \N{} */
+        if (strict) {
+            RExC_parse++;   /* Position after the "}" */
+            vFAIL("Zero length \\N{}");
+        }
         if (cp_count) {
             *cp_count = 0;
         }
@@ -12422,6 +12420,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
                               NULL,     /* Don't need a count of how many code
                                            points */
                               flagp,
+                              RExC_strict,
                               depth)
             ) {
                 break;
@@ -12748,6 +12747,7 @@ S_regatom(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
                                             NULL,   /* Don't need a count of
                                                        how many code points */
                                             flagp,
+                                            RExC_strict,
                                             depth)
                         ) {
                             if (*flagp & NEED_UTF8)
@@ -15529,6 +15529,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth,
                                         &value,    /* Yes single value */
                                         &cp_count, /* Multiple code pt count */
                                         flagp,
+                                        strict,
                                         depth)
                     ) {
 
@@ -15541,11 +15542,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth,
                             vFAIL("\\N in a character class must be a named 
character: \\N{...}");
                         }
                         else if (cp_count == 0) {
-                            if (strict) {
-                                RExC_parse++;   /* Position after the "}" */
-                                vFAIL("Zero length \\N{}");
-                            }
-                            else if (PASS2) {
+                            if (PASS2) {
                                 ckWARNreg(RExC_parse,
                                         "Ignoring zero length \\N{} in 
character class");
                             }
@@ -15666,9 +15663,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth,
 
                     /* Look up the property name, and get its swash and
                      * inversion list, if the property is found  */
-                    if (swash) {    /* Return any left-overs */
-                        SvREFCNT_dec_NN(swash);
-                    }
+                    SvREFCNT_dec(swash); /* Free any left-overs */
                     swash = _core_swash_init("utf8", name, &PL_sv_undef,
                                              1, /* binary */
                                              0, /* not tr/// */
@@ -16344,7 +16339,7 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth,
                            "\"%.*s\" is more clearly written simply as \"%s\"",
                            (int) (RExC_parse - rangebegin),
                            rangebegin,
-                           cntrl_to_mnemonic((char) value)
+                           cntrl_to_mnemonic((U8) value)
                         );
                 }
             }
@@ -18427,11 +18422,16 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                                                     bitmap_invlist);
                 if (lv && lv != &PL_sv_undef) {
                     char *s = savesvpv(lv);
-                    char * const origs = s;
+                    const char * const orig_s = s;  /* Save the beginning of
+                                                       's', so can be freed */
 
+                    /* Ignore anything before the first \n */
                     while (*s && *s != '\n')
                         s++;
 
+                    /* The data are one range per line.  A range is a single
+                     * entity; or two, separated by \t.  So can just convert \n
+                     * to space and \t to '-' */
                     if (*s == '\n') {
                         const char * const t = ++s;
 
@@ -18452,10 +18452,10 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                             if (*s == '\n') {
 
                                 /* Truncate very long output */
-                                if (s - origs > 256) {
+                                if ((UV) (s - t) > 256) {
                                     Perl_sv_catpvf(aTHX_ sv,
                                                 "%.*s...",
-                                                (int) (s - origs - 1),
+                                                (int) (s - t),
                                                 t);
                                     goto out_dump;
                                 }
@@ -18466,15 +18466,18 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                             }
                             s++;
                         }
+
+                        /* Here, it fits in the allocated space.  Replace a
+                         * final blank with a NUL */
                         if (s[-1] == ' ')
-                            s[-1] = 0;
+                            s[-1] = '\0';
 
                         sv_catpv(sv, t);
                     }
 
                   out_dump:
 
-                    Safefree(origs);
+                    Safefree(orig_s);
                     SvREFCNT_dec_NN(lv);
                 }
 
@@ -19173,14 +19176,11 @@ S_put_code_point(pTHX_ SV *sv, UV c)
            sv_catpvs(sv, "\\");
        sv_catpvn(sv, &string, 1);
     }
+    else if (isMNEMONIC_CNTRL(c)) {
+        Perl_sv_catpvf(aTHX_ sv, "%s", cntrl_to_mnemonic((U8) c));
+    }
     else {
-        const char * const mnemonic = cntrl_to_mnemonic((char) c);
-        if (mnemonic) {
-            Perl_sv_catpvf(aTHX_ sv, "%s", mnemonic);
-        }
-        else {
-            Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", (U8) c);
-        }
+        Perl_sv_catpvf(aTHX_ sv, "\\x%02X", (U8) c);
     }
 }
 
@@ -19335,10 +19335,10 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, 
const bool allow_literals)
                     : NUM_ANYOF_CODE_POINTS - 1;
 #if NUM_ANYOF_CODE_POINTS > 256
         format = (this_end < 256)
-                 ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
+                 ? "\\x%02"UVXf"-\\x%02"UVXf""
                  : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
 #else
-        format = "\\x{%02"UVXf"}-\\x{%02"UVXf"}";
+        format = "\\x%02"UVXf"-\\x%02"UVXf"";
 #endif
         GCC_DIAG_IGNORE(-Wformat-nonliteral);
         Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
diff --git a/regcomp.h b/regcomp.h
index 44c2c1c..07e098a 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -553,7 +553,7 @@ struct regnode_ssc {
  * are cautioned about its shared nature */
 #define ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER 0x80
 
-#define ANYOF_FLAGS_ALL                (0xff)
+#define ANYOF_FLAGS_ALL                (0xff & ~0x10)
 
 #define ANYOF_LOCALE_FLAGS (ANYOFL_FOLD | ANYOF_MATCHES_POSIXL)
 
diff --git a/t/re/reg_mesg.t b/t/re/reg_mesg.t
index 6ec5d94..d05922e 100644
--- a/t/re/reg_mesg.t
+++ b/t/re/reg_mesg.t
@@ -295,7 +295,9 @@ my @death_only_under_strict = (
     'm/[\x{ABCDEFG}]/' => 'Illegal hexadecimal digit \'G\' ignored',
                        => 'Non-hex character {#} m/[\x{ABCDEFG{#}}]/',
     'm/[\N{}]/' => 'Ignoring zero length \\N{} in character class {#} 
m/[\\N{}{#}]/',
-                => 'Zero length \\N{} {#} m/[\\N{}]{#}/',
+                => 'Zero length \\N{} {#} m/[\\N{}{#}]/',
+    'm/\N{}/' => "",
+                => 'Zero length \\N{} {#} m/\\N{}{#}/',
     "m'[\\y]\\x{100}'" => 'Unrecognized escape \y in character class passed 
through {#} m/[\y{#}]\x{100}/',
                        => 'Unrecognized escape \y in character class {#} 
m/[\y{#}]\x{100}/',
     'm/[a-\d]\x{100}/' => 'False [] range "a-\d" {#} m/[a-\d{#}]\x{100}/',

--
Perl5 Master Repository

[perl.git] branch blead, updated. v5.23.7-406-g3a3d108

Reply via email to