In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/03fa83ba2035289e6ac69e9f1228252bcc3c0b9d?hp=ca5d3bffbe69ee0415742df798d2d990fce531fc>

- Log -----------------------------------------------------------------
commit 03fa83ba2035289e6ac69e9f1228252bcc3c0b9d
Author: Karl Williamson <[email protected]>
Date:   Wed Sep 3 12:42:07 2014 -0600

    regcomp.h: Comment nits

M       regcomp.h

commit e0a1ff7a2452ef34ae8bb33cda6415709f1833fc
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 14:22:14 2014 -0600

    Allow for changing size of bracketed regex char class
    
    This commit allows Perl to be compiled with a bitmap size that is larger
    than 256.  This bitmap is used to directly look up whether a character
    matches or not, without having to do a binary search or hash lookup.  It
    might improve the performance for some installations that have a lot of
    use of scripts that are above the Latin1 range.

M       embedvar.h
M       intrpvar.h
M       perl.c
M       regcomp.c
M       regcomp.h
M       regexec.c
M       sv.c

commit 8e8a446824eed109a7c437ac4a417de07db94cc4
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 20:07:30 2014 -0600

    Fix -Dr output to work for larger ANYOF node size
    
    This generalizes the code for -Dr output to work to dump the contents of
    ANYOF nodes (bracketed character classes) which have bitmaps for more
    than code points 0-255.

M       embed.fnc
M       embed.h
M       proto.h
M       regcomp.c

commit 2ab58e930a8796c192de074ed05261cc1616c779
Author: Karl Williamson <[email protected]>
Date:   Tue Aug 26 08:36:31 2014 -0600

    regcomp.c: Swap if/else clauses
    
    This makes it slightly easier to understand as there is no explicit
    complement, but is mostly for a future commit.

M       regcomp.c

commit 93e92956bb470aeaf41fd87a47176cf4906ffd1c
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 14:05:40 2014 -0600

    Rename some internal regex #defines
    
    These are renamed to be more clear as to their actual meanings.  I know
    other people have been confused by their former names.
    
    Some of the name changes will become more important as future commits
    will allow the bitmap in a bracketed character class to be a different
    size.

M       regcomp.c
M       regcomp.h
M       regexec.c

commit f64bdbe57e6dafabd081fc3815ae72a00ebd03e6
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 18:19:56 2014 -0600

    regcomp.h: Remove some no-longer used #defines
    
    This is an internal header, so can change names within it.

M       regcomp.h

commit bc51fd7848385e58210a13810ef5ac6f01f70afb
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 14:36:15 2014 -0600

    regcomp.h: Use unsigned 1 in left shift
    
    This prevents a signed result if this macro ever gets used in a U8.
    The ANYOF_BITMAP_TEST macro must now be cast or it would generate warnings
    when compiled with -DPERL_BOOL_AS_CHAR

M       regcomp.h

commit 6f16c8da34619f286a6f24a2d6286f398d3b4503
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 18:50:22 2014 -0600

    regcomp.h: Fix comment that said the opposite of the truth
    
    Too many negations led to this.

M       regcomp.h

commit 70422107c447d915bfc6189d56be459dadadf660
Author: Karl Williamson <[email protected]>
Date:   Thu Aug 28 18:13:47 2014 -0600

    regcomp.c: Remove unnecessary test
    
    The 'while' makes the 'if' unnecessary here.

M       regcomp.c

commit 6942fd9a567743c5784c5445ee49c3a4fc1d3b48
Author: Karl Williamson <[email protected]>
Date:   Wed Aug 27 22:12:02 2014 -0600

    regexec.c: Simplify a short code section
    
    Two "if"s can be combined, leading to one fewer (unoptimized) tests

M       regexec.c
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc  |   2 +-
 embed.h    |   2 +-
 embedvar.h |   1 +
 intrpvar.h |   1 +
 perl.c     |   2 +
 proto.h    |  10 ++--
 regcomp.c  | 195 ++++++++++++++++++++++++++++++++++---------------------------
 regcomp.h  |  82 +++++++++++++-------------
 regexec.c  |  31 +++++-----
 sv.c       |   1 +
 10 files changed, 181 insertions(+), 146 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 44f5ebf..0513663 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2194,7 +2194,7 @@ Es        |const regnode*|dumpuntil|NN const regexp *r|NN 
const regnode *start \
                                |NULLOK const regnode *last \
                                |NULLOK const regnode *plast \
                                |NN SV* sv|I32 indent|U32 depth
-Es     |void   |put_byte       |NN SV* sv|int c
+Es     |void   |put_code_point |NN SV* sv|UV c
 Es     |bool   |put_charclass_bitmap_innards|NN SV* sv     \
                                |NN char* bitmap            \
                                |NULLOK SV** bitmap_invlist
diff --git a/embed.h b/embed.h
index 938a5c9..2abc4e2 100644
--- a/embed.h
+++ b/embed.h
@@ -917,8 +917,8 @@
 #define dump_trie_interim_list(a,b,c,d,e)      S_dump_trie_interim_list(aTHX_ 
a,b,c,d,e)
 #define dump_trie_interim_table(a,b,c,d,e)     S_dump_trie_interim_table(aTHX_ 
a,b,c,d,e)
 #define dumpuntil(a,b,c,d,e,f,g,h)     S_dumpuntil(aTHX_ a,b,c,d,e,f,g,h)
-#define put_byte(a,b)          S_put_byte(aTHX_ a,b)
 #define put_charclass_bitmap_innards(a,b,c)    
S_put_charclass_bitmap_innards(aTHX_ a,b,c)
+#define put_code_point(a,b)    S_put_code_point(aTHX_ a,b)
 #define put_range(a,b,c,d)     S_put_range(aTHX_ a,b,c,d)
 #define regdump_extflags(a,b)  S_regdump_extflags(aTHX_ a,b)
 #define regdump_intflags(a,b)  S_regdump_intflags(aTHX_ a,b)
diff --git a/embedvar.h b/embedvar.h
index 766880c..d481681 100644
--- a/embedvar.h
+++ b/embedvar.h
@@ -53,6 +53,7 @@
 #define PL_Dir                 (vTHX->IDir)
 #define PL_Env                 (vTHX->IEnv)
 #define PL_HasMultiCharFold    (vTHX->IHasMultiCharFold)
+#define PL_InBitmap            (vTHX->IInBitmap)
 #define PL_LIO                 (vTHX->ILIO)
 #define PL_Latin1              (vTHX->ILatin1)
 #define PL_Mem                 (vTHX->IMem)
diff --git a/intrpvar.h b/intrpvar.h
index 06194d9..57918b2 100644
--- a/intrpvar.h
+++ b/intrpvar.h
@@ -580,6 +580,7 @@ PERLVAR(I, numeric_radix_sv, SV *)  /* The radix separator 
if not '.' */
 PERLVAR(I, Latin1,     SV *)
 PERLVAR(I, UpperLatin1,        SV *)   /* Code points 128 - 255 */
 PERLVAR(I, AboveLatin1,        SV *)
+PERLVAR(I, InBitmap,   SV *)
 
 PERLVAR(I, NonL1NonFinalFold,   SV *)
 PERLVAR(I, HasMultiCharFold,   SV *)
diff --git a/perl.c b/perl.c
index b61e2ff..8f45273 100644
--- a/perl.c
+++ b/perl.c
@@ -1034,6 +1034,7 @@ perl_destruct(pTHXx)
     SvREFCNT_dec(PL_utf8_foldable);
     SvREFCNT_dec(PL_utf8_foldclosures);
     SvREFCNT_dec(PL_AboveLatin1);
+    SvREFCNT_dec(PL_InBitmap);
     SvREFCNT_dec(PL_UpperLatin1);
     SvREFCNT_dec(PL_Latin1);
     SvREFCNT_dec(PL_NonL1NonFinalFold);
@@ -1047,6 +1048,7 @@ perl_destruct(pTHXx)
     PL_utf8_idcont     = NULL;
     PL_utf8_foldclosures = NULL;
     PL_AboveLatin1       = NULL;
+    PL_InBitmap          = NULL;
     PL_HasMultiCharFold  = NULL;
     PL_Latin1            = NULL;
     PL_NonL1NonFinalFold = NULL;
diff --git a/proto.h b/proto.h
index a6453dc..35ec89b 100644
--- a/proto.h
+++ b/proto.h
@@ -5382,17 +5382,17 @@ STATIC const regnode*   S_dumpuntil(pTHX_ const regexp 
*r, const regnode *start, c
 #define PERL_ARGS_ASSERT_DUMPUNTIL     \
        assert(r); assert(start); assert(node); assert(sv)
 
-STATIC void    S_put_byte(pTHX_ SV* sv, int c)
-                       __attribute__nonnull__(pTHX_1);
-#define PERL_ARGS_ASSERT_PUT_BYTE      \
-       assert(sv)
-
 STATIC bool    S_put_charclass_bitmap_innards(pTHX_ SV* sv, char* bitmap, SV** 
bitmap_invlist)
                        __attribute__nonnull__(pTHX_1)
                        __attribute__nonnull__(pTHX_2);
 #define PERL_ARGS_ASSERT_PUT_CHARCLASS_BITMAP_INNARDS  \
        assert(sv); assert(bitmap)
 
+STATIC void    S_put_code_point(pTHX_ SV* sv, UV c)
+                       __attribute__nonnull__(pTHX_1);
+#define PERL_ARGS_ASSERT_PUT_CODE_POINT        \
+       assert(sv)
+
 STATIC void    S_put_range(pTHX_ SV* sv, UV start, const UV end, const bool 
allow_literals)
                        __attribute__nonnull__(pTHX_1);
 #define PERL_ARGS_ASSERT_PUT_RANGE     \
diff --git a/regcomp.c b/regcomp.c
index ef6cae9..3f12e97 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -873,7 +873,7 @@ S_ssc_anything(pTHX_ regnode_ssc *ssc)
 
     ssc->invlist = sv_2mortal(_new_invlist(2)); /* mortalize so won't leak */
     _append_range_to_invlist(ssc->invlist, 0, UV_MAX);
-    ANYOF_FLAGS(ssc) |= ANYOF_EMPTY_STRING;    /* Plus match empty string */
+    ANYOF_FLAGS(ssc) |= SSC_MATCHES_EMPTY_STRING;  /* Plus matches empty */
 }
 
 STATIC int
@@ -891,7 +891,7 @@ S_ssc_is_anything(const regnode_ssc *ssc)
 
     assert(is_ANYOF_SYNTHETIC(ssc));
 
-    if (! (ANYOF_FLAGS(ssc) & ANYOF_EMPTY_STRING)) {
+    if (! (ANYOF_FLAGS(ssc) & SSC_MATCHES_EMPTY_STRING)) {
         return FALSE;
     }
 
@@ -930,7 +930,7 @@ S_ssc_init(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc)
 
     Zero(ssc, 1, regnode_ssc);
     set_ANYOF_SYNTHETIC(ssc);
-    ARG_SET(ssc, ANYOF_NONBITMAP_EMPTY);
+    ARG_SET(ssc, ANYOF_ONLY_HAS_BITMAP);
     ssc_anything(ssc);
 
     /* If any portion of the regex is to operate under locale rules,
@@ -1000,7 +1000,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t 
*pRExC_state,
     PERL_ARGS_ASSERT_GET_ANYOF_CP_LIST_FOR_SSC;
 
     /* Look at the data structure created by S_set_ANYOF_arg() */
-    if (n != ANYOF_NONBITMAP_EMPTY) {
+    if (n != ANYOF_ONLY_HAS_BITMAP) {
         SV * const rv = MUTABLE_SV(RExC_rxi->data->data[n]);
         AV * const av = MUTABLE_AV(SvRV(rv));
         SV **const ary = AvARRAY(av);
@@ -1056,13 +1056,13 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t 
*pRExC_state,
 
     /* If this can match all upper Latin1 code points, have to add them
      * as well */
-    if (ANYOF_FLAGS(node) & ANYOF_NON_UTF8_NON_ASCII_ALL) {
+    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
         _invlist_union(invlist, PL_UpperLatin1, &invlist);
     }
 
     /* Similarly for these */
-    if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) {
-        invlist = _add_range_to_invlist(invlist, 256, UV_MAX);
+    if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+        _invlist_union_complement_2nd(invlist, PL_InBitmap, &invlist);
     }
 
     if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
@@ -1095,8 +1095,8 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t 
*pRExC_state,
 #define ssc_match_all_cp(ssc) ssc_add_range(ssc, 0, UV_MAX)
 
 /* 'AND' a given class with another one.  Can create false positives.  'ssc'
- * should not be inverted.  'and_with->flags & ANYOF_POSIXL' should be 0 if
- * 'and_with' is a regnode_charclass instead of a regnode_ssc. */
+ * should not be inverted.  'and_with->flags & ANYOF_MATCHES_POSIXL' should be
+ * 0 if 'and_with' is a regnode_charclass instead of a regnode_ssc. */
 
 STATIC void
 S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, regnode_ssc *ssc,
@@ -1187,7 +1187,7 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc,
 
         /* If either P1 or P2 is empty, the intersection will be also; can skip
          * the loop */
-        if (! (ANYOF_FLAGS(and_with) & ANYOF_POSIXL)) {
+        if (! (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL)) {
             ANYOF_POSIXL_ZERO(ssc);
         }
         else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
@@ -1246,16 +1246,16 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc,
             else {
                 ssc->invlist = anded_cp_list;
                 ANYOF_POSIXL_ZERO(ssc);
-                if (ANYOF_FLAGS(and_with) & ANYOF_POSIXL) {
+                if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
                     ANYOF_POSIXL_OR((regnode_charclass_posixl*) and_with, ssc);
                 }
             }
         }
         else if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)
-                 || (ANYOF_FLAGS(and_with) & ANYOF_POSIXL))
+                 || (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL))
         {
             /* One or the other of P1, P2 is non-empty. */
-            if (ANYOF_FLAGS(and_with) & ANYOF_POSIXL) {
+            if (ANYOF_FLAGS(and_with) & ANYOF_MATCHES_POSIXL) {
                 ANYOF_POSIXL_AND((regnode_charclass_posixl*) and_with, ssc);
             }
             ssc_union(ssc, anded_cp_list, FALSE);
@@ -1317,7 +1317,7 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc,
     {
         /* We ignore P2, leaving P1 going forward */
     }   /* else  Not inverted */
-    else if (ANYOF_FLAGS(or_with) & ANYOF_POSIXL) {
+    else if (ANYOF_FLAGS(or_with) & ANYOF_MATCHES_POSIXL) {
         ANYOF_POSIXL_OR((regnode_charclass_posixl*)or_with, ssc);
         if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
             unsigned int i;
@@ -1421,8 +1421,8 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, 
regnode_ssc *ssc)
     assert(is_ANYOF_SYNTHETIC(ssc));
 
     /* The code in this file assumes that all but these flags aren't relevant
-     * to the SSC, except ANYOF_EMPTY_STRING, which should be cleared by the
-     * time we reach here */
+     * to the SSC, except SSC_MATCHES_EMPTY_STRING, which should be cleared
+     * by the time we reach here */
     assert(! (ANYOF_FLAGS(ssc) & ~ANYOF_COMMON_FLAGS));
 
     populate_ANYOF_from_invlist( (regnode *) ssc, &invlist);
@@ -1434,7 +1434,7 @@ S_ssc_finalize(pTHX_ RExC_state_t *pRExC_state, 
regnode_ssc *ssc)
     ssc->invlist = NULL;
 
     if (ANYOF_POSIXL_SSC_TEST_ANY_SET(ssc)) {
-        ANYOF_FLAGS(ssc) |= ANYOF_POSIXL;
+        ANYOF_FLAGS(ssc) |= ANYOF_MATCHES_POSIXL;
     }
 
     assert(! (ANYOF_FLAGS(ssc) & ANYOF_LOCALE_FLAGS) || RExC_contains_locale);
@@ -4235,7 +4235,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
              * can't match null string */
            if (flags & SCF_DO_STCLASS_AND) {
                 ssc_cp_and(data->start_class, uc);
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
                 ssc_clear_locale(data->start_class);
            }
            else if (flags & SCF_DO_STCLASS_OR) {
@@ -4243,7 +4243,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                ssc_and(pRExC_state, data->start_class, (regnode_charclass *) 
and_withp);
 
                 /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
            }
            flags &= ~SCF_DO_STCLASS;
        }
@@ -4418,7 +4418,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                 }
             }
            if (flags & SCF_DO_STCLASS_AND) {
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
                 ANYOF_POSIXL_ZERO(data->start_class);
                 ssc_intersection(data->start_class, EXACTF_invlist, FALSE);
            }
@@ -4427,7 +4427,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                ssc_and(pRExC_state, data->start_class, (regnode_charclass *) 
and_withp);
 
                 /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
            }
            flags &= ~SCF_DO_STCLASS;
             SvREFCNT_dec(EXACTF_invlist);
@@ -4546,7 +4546,8 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                        flags &= ~SCF_DO_STCLASS_AND;
                        StructCopy(&this_class, data->start_class, regnode_ssc);
                        flags |= SCF_DO_STCLASS_OR;
-                        ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
+                        ANYOF_FLAGS(data->start_class)
+                                                |= SSC_MATCHES_EMPTY_STRING;
                    }
                } else {                /* Non-zero len */
                    if (flags & SCF_DO_STCLASS_OR) {
@@ -4842,7 +4843,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                     ssc_intersection(data->start_class,
                                     PL_XPosix_ptrs[_CC_VERTSPACE], FALSE);
                     ssc_clear_locale(data->start_class);
-                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
                 }
                 else if (flags & SCF_DO_STCLASS_OR) {
                     ssc_union(data->start_class,
@@ -4852,7 +4854,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
 
                     /* See commit msg for
                      * 749e076fceedeb708a624933726e7989f2302f6a */
-                    ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                    ANYOF_FLAGS(data->start_class)
+                                                &= ~SSC_MATCHES_EMPTY_STRING;
                 }
                flags &= ~SCF_DO_STCLASS;
             }
@@ -4879,7 +4882,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                 U8 namedclass;
 
                 /* See commit msg 749e076fceedeb708a624933726e7989f2302f6a */
-                ANYOF_FLAGS(data->start_class) &= ~ANYOF_EMPTY_STRING;
+                ANYOF_FLAGS(data->start_class) &= ~SSC_MATCHES_EMPTY_STRING;
 
                /* Some of the logic below assumes that switching
                   locale on will only add false positives. */
@@ -5120,7 +5123,8 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
                          * assertions are zero-length, so can match an EMPTY
                          * string */
                        ssc_and(pRExC_state, data->start_class, 
(regnode_charclass *) &intrnl);
-                        ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
+                        ANYOF_FLAGS(data->start_class)
+                                                   |= SSC_MATCHES_EMPTY_STRING;
                    }
                 }
            }
@@ -5192,7 +5196,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVuf" RHS=%"UVuf"\n",
 
                 if (f & SCF_DO_STCLASS_AND) {
                     ssc_and(pRExC_state, data->start_class, (regnode_charclass 
*) &intrnl);
-                    ANYOF_FLAGS(data->start_class) |= ANYOF_EMPTY_STRING;
+                    ANYOF_FLAGS(data->start_class) |= SSC_MATCHES_EMPTY_STRING;
                 }
                 if (data) {
                     if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
@@ -6272,6 +6276,13 @@ Perl_re_op_compile(pTHX_ SV ** const patternp, int 
pat_count,
         PL_utf8_foldable = _new_invlist_C_array(_Perl_Any_Folds_invlist);
         PL_HasMultiCharFold =
                        _new_invlist_C_array(_Perl_Folds_To_Multi_Char_invlist);
+
+        /* This is calculated here, because the Perl program that generates the
+         * static global ones doesn't currently have access to
+         * NUM_ANYOF_CODE_POINTS */
+       PL_InBitmap = _new_invlist(2);
+       PL_InBitmap = _add_range_to_invlist(PL_InBitmap, 0,
+                                                    NUM_ANYOF_CODE_POINTS - 1);
     }
 #endif
 
@@ -6989,7 +7000,7 @@ reStudy:
 
        if ((!(r->anchored_substr || r->anchored_utf8) || r->anchored_offset)
            && stclass_flag
-            && ! (ANYOF_FLAGS(data.start_class) & ANYOF_EMPTY_STRING)
+            && ! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
            && !ssc_is_anything(data.start_class))
        {
            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
@@ -7069,7 +7080,7 @@ reStudy:
        r->check_substr = r->check_utf8 = r->anchored_substr = r->anchored_utf8
                = r->float_substr = r->float_utf8 = NULL;
 
-        if (! (ANYOF_FLAGS(data.start_class) & ANYOF_EMPTY_STRING)
+        if (! (ANYOF_FLAGS(data.start_class) & SSC_MATCHES_EMPTY_STRING)
             && ! ssc_is_anything(data.start_class))
         {
            const U32 n = add_data(pRExC_state, STR_WITH_LEN("f"));
@@ -12451,11 +12462,11 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, 
SV** invlist_ptr)
            UV high;
            int i;
 
-            if (end == UV_MAX && start <= 256) {
-                ANYOF_FLAGS(node) |= ANYOF_ABOVE_LATIN1_ALL;
+            if (end == UV_MAX && start <= NUM_ANYOF_CODE_POINTS) {
+                ANYOF_FLAGS(node) |= ANYOF_MATCHES_ALL_ABOVE_BITMAP;
             }
-            else if (end >= 256) {
-                ANYOF_FLAGS(node) |= ANYOF_UTF8;
+            else if (end >= NUM_ANYOF_CODE_POINTS) {
+                ANYOF_FLAGS(node) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
             }
 
            /* Quit if are above what we should change */
@@ -12478,13 +12489,13 @@ S_populate_ANYOF_from_invlist(pTHX_ regnode *node, 
SV** invlist_ptr)
        invlist_iterfinish(*invlist_ptr);
 
         /* Done with loop; remove any code points that are in the bitmap from
-         * *invlist_ptr; similarly for code points above latin1 if we have a
-         * flag to match all of them anyways */
+         * *invlist_ptr; similarly for code points above the bitmap if we have
+         * a flag to match all of them anyways */
        if (change_invlist) {
-           _invlist_subtract(*invlist_ptr, PL_Latin1, invlist_ptr);
+           _invlist_subtract(*invlist_ptr, PL_InBitmap, invlist_ptr);
        }
-        if (ANYOF_FLAGS(node) & ANYOF_ABOVE_LATIN1_ALL) {
-           _invlist_intersection(*invlist_ptr, PL_Latin1, invlist_ptr);
+        if (ANYOF_FLAGS(node) & ANYOF_MATCHES_ALL_ABOVE_BITMAP) {
+           _invlist_intersection(*invlist_ptr, PL_InBitmap, invlist_ptr);
        }
 
        /* If have completely emptied it, remove it completely */
@@ -13646,7 +13657,8 @@ parseit:
                          * inappropriately, except that any \p{}, including
                          * this one forces Unicode semantics, which means there
                          * is no <depends_list> */
-                        ANYOF_FLAGS(ret) |= ANYOF_NONBITMAP_NON_UTF8;
+                        ANYOF_FLAGS(ret)
+                                      |= ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES;
                     }
                     else {
 
@@ -13865,18 +13877,18 @@ parseit:
                     else {
                         RExC_emit += ANYOF_POSIXL_SKIP - ANYOF_SKIP;
                     }
-                    ANYOF_FLAGS(ret) |= ANYOF_POSIXL;
+                    ANYOF_FLAGS(ret) |= ANYOF_MATCHES_POSIXL;
                     ANYOF_POSIXL_ZERO(ret);
                 }
 
                 /* Coverity thinks it is possible for this to be negative; both
                  * jhi and khw think it's not, but be safer */
-                assert(! (ANYOF_FLAGS(ret) & ANYOF_POSIXL)
+                assert(! (ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
                        || (namedclass + ((namedclass % 2) ? -1 : 1)) >= 0);
 
                 /* See if it already matches the complement of this POSIX
                  * class */
-                if ((ANYOF_FLAGS(ret) & ANYOF_POSIXL)
+                if ((ANYOF_FLAGS(ret) & ANYOF_MATCHES_POSIXL)
                     && ANYOF_POSIXL_TEST(ret, namedclass + ((namedclass % 2)
                                                             ? -1
                                                             : 1)))
@@ -14598,7 +14610,7 @@ parseit:
             if (DEPENDS_SEMANTICS) {
                 /* Under /d, everything in the upper half of the Latin1 range
                  * matches these complements */
-                ANYOF_FLAGS(ret) |= ANYOF_NON_UTF8_NON_ASCII_ALL;
+                ANYOF_FLAGS(ret) |= ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII;
             }
             else if (AT_LEAST_ASCII_RESTRICTED) {
                 /* Under /a and /aa, everything above ASCII matches these
@@ -14904,7 +14916,7 @@ parseit:
        else {
            cp_list = depends_list;
        }
-        ANYOF_FLAGS(ret) |= ANYOF_UTF8;
+        ANYOF_FLAGS(ret) |= ANYOF_HAS_UTF8_NONBITMAP_MATCHES;
     }
 
     /* If there is a swash and more than one element, we can't use the swash in
@@ -14946,7 +14958,7 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
 {
     /* Sets the arg field of an ANYOF-type node 'node', using information about
      * the node passed-in.  If there is nothing outside the node's bitmap, the
-     * arg is set to ANYOF_NONBITMAP_EMPTY.  Otherwise, it sets the argument to
+     * arg is set to ANYOF_ONLY_HAS_BITMAP.  Otherwise, it sets the argument to
      * the count returned by add_data(), having allocated and stored an array,
      * av, that that count references, as follows:
      *  av[0] stores the character class description in its textual form.
@@ -14972,15 +14984,17 @@ S_set_ANYOF_arg(pTHX_ RExC_state_t* const pRExC_state,
 
     if (! cp_list && ! runtime_defns && ! only_utf8_locale_list) {
         assert(! (ANYOF_FLAGS(node)
-                    & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8)));
-       ARG_SET(node, ANYOF_NONBITMAP_EMPTY);
+                  & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                     |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES)));
+       ARG_SET(node, ANYOF_ONLY_HAS_BITMAP);
     }
     else {
        AV * const av = newAV();
        SV *rv;
 
         assert(ANYOF_FLAGS(node)
-                    & (ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8|ANYOF_LOC_FOLD));
+               & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                  |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
 
        av_store(av, 0, (runtime_defns)
                        ? SvREFCNT_inc(runtime_defns) : &PL_sv_undef);
@@ -15046,7 +15060,8 @@ Perl__get_regclass_nonbitmap_data(pTHX_ const regexp 
*prog,
     PERL_ARGS_ASSERT__GET_REGCLASS_NONBITMAP_DATA;
 
     assert(ANYOF_FLAGS(node)
-                        & 
(ANYOF_UTF8|ANYOF_NONBITMAP_NON_UTF8|ANYOF_LOC_FOLD));
+        & (ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+           |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES|ANYOF_LOC_FOLD));
 
     if (data && data->count) {
        const U32 n = ARG(node);
@@ -15944,9 +15959,9 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
             }
         }
 
-       if ((flags & (ANYOF_ABOVE_LATIN1_ALL
-                      |ANYOF_UTF8
-                      |ANYOF_NONBITMAP_NON_UTF8
+       if ((flags & (ANYOF_MATCHES_ALL_ABOVE_BITMAP
+                      |ANYOF_HAS_UTF8_NONBITMAP_MATCHES
+                      |ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES
                       |ANYOF_LOC_FOLD)))
         {
             if (do_sep) {
@@ -15956,14 +15971,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                     sv_catpvs(sv, "^");
             }
 
-            if (flags & ANYOF_NON_UTF8_NON_ASCII_ALL) {
+            if (flags & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII) {
                 sv_catpvs(sv, "{non-utf8-latin1-all}");
             }
 
             /* output information about the unicode matching */
-            if (flags & ANYOF_ABOVE_LATIN1_ALL)
-                sv_catpvs(sv, "{unicode_all}");
-            else if (ARG(o) != ANYOF_NONBITMAP_EMPTY) {
+            if (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP)
+                sv_catpvs(sv, "{above_bitmap_all}");
+            else if (ARG(o) != ANYOF_ONLY_HAS_BITMAP) {
                 SV *lv; /* Set if there is something outside the bit map. */
                 bool byte_output = FALSE;   /* If something in the bitmap has
                                                been output */
@@ -15985,7 +16000,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                     if (*s == '\n') {
                         const char * const t = ++s;
 
-                        if (flags & ANYOF_NONBITMAP_NON_UTF8) {
+                        if (flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES) {
                             sv_catpvs(sv, "{outside bitmap}");
                         }
                         else {
@@ -16685,12 +16700,21 @@ Perl_save_re_context(pTHX)
                     ((c) == '-' || (c) == ']' || (c) == '\\' || (c) == '^')
 
 STATIC void
-S_put_byte(pTHX_ SV *sv, int c)
+S_put_code_point(pTHX_ SV *sv, UV c)
 {
-    PERL_ARGS_ASSERT_PUT_BYTE;
+    PERL_ARGS_ASSERT_PUT_CODE_POINT;
 
-    if (!isPRINT(c)) {
-        switch (c) {
+    if (c > 255) {
+        Perl_sv_catpvf(aTHX_ sv, "\\x{%04"UVXf"}", c);
+    }
+    else if (isPRINT(c)) {
+       const char string = (char) c;
+       if (isBACKSLASHED_PUNCT(c))
+           sv_catpvs(sv, "\\");
+       sv_catpvn(sv, &string, 1);
+    }
+    else {
+        switch ((U8) c) {
             case '\a': Perl_sv_catpvf(aTHX_ sv, "\\a"); break;
             case '\b': Perl_sv_catpvf(aTHX_ sv, "\\b"); break;
             case ESC_NATIVE: Perl_sv_catpvf(aTHX_ sv, "\\e"); break;
@@ -16698,15 +16722,9 @@ S_put_byte(pTHX_ SV *sv, int c)
             case '\n': Perl_sv_catpvf(aTHX_ sv, "\\n"); break;
             case '\r': Perl_sv_catpvf(aTHX_ sv, "\\r"); break;
             case '\t': Perl_sv_catpvf(aTHX_ sv, "\\t"); break;
-            default: Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", c); break;
+            default: Perl_sv_catpvf(aTHX_ sv, "\\x{%02X}", (U8) c); break;
         }
     }
-    else {
-       const char string = c;
-       if (isBACKSLASHED_PUNCT(c))
-           sv_catpvs(sv, "\\");
-       sv_catpvn(sv, &string, 1);
-    }
 }
 
 #define MAX_PRINT_A MAX_PRINT_A_FOR_USE_ONLY_BY_REGCOMP_DOT_C
@@ -16720,7 +16738,7 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const 
bool allow_literals)
 {
     /* Appends to 'sv' a displayable version of the range of code points from
      * 'start' to 'end'.  It assumes that only ASCII printables are displayable
-     * as-is (though some of these will be escaped by put_byte()). */
+     * as-is (though some of these will be escaped by put_code_point()). */
 
     const unsigned int min_range_count = 3;
 
@@ -16729,11 +16747,14 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, 
const bool allow_literals)
     PERL_ARGS_ASSERT_PUT_RANGE;
 
     while (start <= end) {
+        UV this_end;
+        const char * format;
+
         if (end - start < min_range_count) {
 
             /* Individual chars in short ranges */
             for (; start <= end; start++) {
-                put_byte(sv, start);
+                put_code_point(sv, start);
             }
             break;
         }
@@ -16805,9 +16826,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const 
bool allow_literals)
                     put_range(sv, start, temp_end, FALSE);
                 }
                 else {  /* Output as a range */
-                    put_byte(sv, start);
+                    put_code_point(sv, start);
                     sv_catpvs(sv, "-");
-                    put_byte(sv, temp_end);
+                    put_code_point(sv, temp_end);
                 }
                 start = temp_end + 1;
                 continue;
@@ -16818,7 +16839,7 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, const 
bool allow_literals)
                 while (start <= end && (isPUNCT_A(start)
                                         || isSPACE_A(start)))
                 {
-                    put_byte(sv, start);
+                    put_code_point(sv, start);
                     start++;
                 }
                 continue;
@@ -16829,11 +16850,9 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, 
const bool allow_literals)
          * mnemonic names.  Split off any of those at the beginning and end of
          * the range to print mnemonically.  It isn't possible for many of
          * these to be in a row, so this won't overwhelm with output */
-        if (isMNEMONIC_CNTRL(start)) {
-            while (isMNEMONIC_CNTRL(start) && start <= end) {
-                put_byte(sv, start);
-                start++;
-            }
+        while (isMNEMONIC_CNTRL(start) && start <= end) {
+            put_code_point(sv, start);
+            start++;
         }
         if (start < end && isMNEMONIC_CNTRL(end)) {
 
@@ -16850,18 +16869,21 @@ S_put_range(pTHX_ SV *sv, UV start, const UV end, 
const bool allow_literals)
             /* Then output the mnemonic trailing controls */
             start = temp_end + 1;
             while (start <= end) {
-                put_byte(sv, start);
+                put_code_point(sv, start);
                 start++;
             }
             break;
         }
 
         /* As a final resort, output the range or subrange as hex. */
-        Perl_sv_catpvf(aTHX_ sv, "\\x{%02" UVXf "}-\\x{%02" UVXf "}",
-                       start,
-                       (end < NUM_ANYOF_CODE_POINTS)
-                       ? end
-                       : NUM_ANYOF_CODE_POINTS - 1);
+
+        this_end = (end < NUM_ANYOF_CODE_POINTS)
+                    ? end
+                    : NUM_ANYOF_CODE_POINTS - 1;
+        format = (this_end < 256)
+                 ? "\\x{%02"UVXf"}-\\x{%02"UVXf"}"
+                 : "\\x{%04"UVXf"}-\\x{%04"UVXf"}";
+        Perl_sv_catpvf(aTHX_ sv, format, start, this_end);
         break;
     }
 }
@@ -16952,8 +16974,7 @@ S_put_charclass_bitmap_innards(pTHX_ SV *sv, char 
*bitmap, SV** bitmap_invlist)
 
         /* Add everything remaining to the list, so when we invert it just
          * below, it will be excluded */
-        *invlist_ptr = _add_range_to_invlist(*invlist_ptr,
-                                             NUM_ANYOF_CODE_POINTS, UV_MAX);
+        _invlist_union_complement_2nd(*invlist_ptr, PL_InBitmap, invlist_ptr);
         _invlist_invert(*invlist_ptr);
     }
 
@@ -17118,7 +17139,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode 
*start, const regnode *node,
        }
        else if (PL_regkind[(U8)op] == ANYOF) {
            /* arglen 1 + class block */
-           node += 1 + ((ANYOF_FLAGS(node) & ANYOF_POSIXL)
+           node += 1 + ((ANYOF_FLAGS(node) & ANYOF_MATCHES_POSIXL)
                           ? ANYOF_POSIXL_SKIP
                           : ANYOF_SKIP);
            node = NEXTOPER(node);
diff --git a/regcomp.h b/regcomp.h
index 68646f1..2b73d86 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -184,7 +184,20 @@ struct regnode_2 {
     U16 arg2;
 };
 
-#define NUM_ANYOF_CODE_POINTS   256
+/* This give the number of code points that can be in the bitmap of an ANYOF
+ * node.  The shift number must currently be one of: 8..12.  It can't be less
+ * than 8 (256) because some code relies on it being at least that.  Above 12
+ * (4096), and you start running into warnings that some data structure widths
+ * have been exceeded, though the test suite as of this writing still passes
+ * for up through 16, which is as high as anyone would ever want to go,
+ * encompassing all of the Unicode BMP, and thus including all the economically
+ * important world scripts.  At 12 most of them are: including Arabic,
+ * Cyrillic, Greek, Hebrew, Indian subcontinent, Latin, and Thai; but not Han,
+ * Japanese, nor Korean.  (The regarglen structure in regnodes.h is a U8, and
+ * the trie types TRIEC and AHOCORASICKC are larger than U8 for shift values
+ * below above 12.)  Be sure to benchmark before changing, as larger sizes do
+ * significantly slow down the test suite */
+#define NUM_ANYOF_CODE_POINTS   (1 << 8)
 
 #define ANYOF_BITMAP_SIZE      (NUM_ANYOF_CODE_POINTS / 8)   /* 8 bits/Byte */
 
@@ -210,11 +223,11 @@ struct regnode_charclass {
 
 /* has runtime (locale) \d, \w, ..., [:posix:] classes */
 struct regnode_charclass_class {
-    U8 flags;                          /* ANYOF_POSIXL bit must go here */
+    U8 flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
     U8  type;
     U16 next_off;
     U32 arg1;
-    char bitmap[ANYOF_BITMAP_SIZE];            /* both compile-time */
+    char bitmap[ANYOF_BITMAP_SIZE];            /* both compile-time ... */
     U32 classflags;                            /* and run-time */
 };
 
@@ -228,11 +241,11 @@ struct regnode_charclass_class {
  * have a pointer field because there is no alignment issue, and because it is
  * set to NULL after construction, before any cloning of the pattern */
 struct regnode_ssc {
-    U8 flags;                          /* ANYOF_POSIXL bit must go here */
+    U8 flags;                      /* ANYOF_MATCHES_POSIXL bit must go here */
     U8  type;
     U16 next_off;
     U32 arg1;
-    char bitmap[ANYOF_BITMAP_SIZE];    /* both compile-time */
+    char bitmap[ANYOF_BITMAP_SIZE];    /* both compile-time ... */
     U32 classflags;                    /* and run-time */
 
     /* Auxiliary, only used during construction; NULL afterwards: list of code
@@ -347,13 +360,13 @@ struct regnode_ssc {
 #define PASS1 SIZE_ONLY
 #define PASS2 (! SIZE_ONLY)
 
-/* If the bitmap doesn't fully represent what this ANYOF node can match, the
+/* If the bitmap fully represents what this ANYOF node can match, the
  * ARG is set to this special value (since 0, 1, ... are legal, but will never
  * reach this high). */
-#define ANYOF_NONBITMAP_EMPTY  ((U32) -1)
+#define ANYOF_ONLY_HAS_BITMAP  ((U32) -1)
 
 /* Flags for node->flags of ANYOF.  These are in short supply, with none
- * currently available.  The ABOVE_LATIN1_ALL bit could be freed up
+ * currently available.  The ABOVE_BITMAP_ALL bit could be freed up
  * by resorting to creating a swash containing everything above 255.  This
  * introduces a performance penalty.  An option that wouldn't slow things down
  * would be to split one of the two LOC flags out into a separate
@@ -365,57 +378,55 @@ struct regnode_ssc {
  * only for /d, so there are no combinatorial issues.  The LOC flag to use is
  * probably the POSIXL one.
  * Several flags are not used in synthetic start class (SSC) nodes, so could be
- * shared should new flags be needed for SSCs, like ANYOF_EMPTY_STRING now. */
+ * shared should new flags be needed for SSCs, like SSC_MATCHES_EMPTY_STRING
+ * now. */
 
 /* regexec.c is expecting this to be in the low bit */
-#define ANYOF_INVERT            0x01
+#define ANYOF_INVERT                           0x01
 
 /* For the SSC node only, which cannot be inverted, so is shared with that bit.
- * This means "Does this SSC match an empty string?"  This is used only during
- * regex compilation. */
-#define ANYOF_EMPTY_STRING       ANYOF_INVERT
+ * This is used only during regex compilation. */
+#define SSC_MATCHES_EMPTY_STRING                ANYOF_INVERT
 
-/* Are there things that will match only if the target string is encoded in
- * UTF-8?  (This is not set if ANYOF_AOVE_LATIN1_ALL is set) */
-#define ANYOF_UTF8               0x02
+/* Are there things outside the bitmap that will match only if the target
+ * string is encoded in UTF-8?  (This is not set if ANYOF_ABOVE_BITMAP_ALL is
+ * set) */
+#define ANYOF_HAS_UTF8_NONBITMAP_MATCHES        0x02
 
 /* The fold is calculated and stored in the bitmap where possible at compile
  * time.  However under locale, the actual folding varies depending on
  * what the locale is at the time of execution, so it has to be deferred until
  * then */
-#define ANYOF_LOC_FOLD           0x04
+#define ANYOF_LOC_FOLD                          0x04
 
 /* Set if this is a regnode_charclass_posixl vs a regnode_charclass.  This
  * is used for runtime \d, \w, [:posix:], ..., which are used only in locale
  * and the optimizer's synthetic start class.  Non-locale \d, etc are resolved
  * at compile-time */
-#define ANYOF_POSIXL            0x08
-#define ANYOF_CLASS             ANYOF_POSIXL
-#define ANYOF_LARGE              ANYOF_POSIXL
+#define ANYOF_MATCHES_POSIXL                    0x08
 
 /* Should we raise a warning if matching against an above-Unicode code point?
  * */
-#define ANYOF_WARN_SUPER        0x10
+#define ANYOF_WARN_SUPER                        0x10
 
 /* Can match something outside the bitmap that isn't in utf8 */
-#define ANYOF_NONBITMAP_NON_UTF8 0x20
+#define ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES    0x20
 
-/* Matches every code point 0x100 and above*/
-#define ANYOF_ABOVE_LATIN1_ALL  0x40
-#define ANYOF_UNICODE_ALL       ANYOF_ABOVE_LATIN1_ALL
+/* Matches every code point NUM_ANYOF_CODE_POINTS and above*/
+#define ANYOF_MATCHES_ALL_ABOVE_BITMAP          0x40
 
 /* Match all Latin1 characters that aren't ASCII when the target string is not
  * in utf8. */
-#define ANYOF_NON_UTF8_NON_ASCII_ALL 0x80
+#define ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII    0x80
 
 #define ANYOF_FLAGS_ALL                (0xff)
 
-#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_POSIXL)
+#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_MATCHES_POSIXL)
 
 /* These are the flags that apply to both regular ANYOF nodes and synthetic
  * start class nodes during construction of the SSC.  During finalization of
  * the SSC, other of the flags could be added to it */
-#define ANYOF_COMMON_FLAGS    (ANYOF_WARN_SUPER|ANYOF_UTF8)
+#define ANYOF_COMMON_FLAGS    
(ANYOF_WARN_SUPER|ANYOF_HAS_UTF8_NONBITMAP_MATCHES)
 
 /* Character classes for node->classflags of ANYOF */
 /* Should be synchronized with a table in regprop() */
@@ -500,7 +511,7 @@ struct regnode_ssc {
 
 #define ANYOF_FLAGS(p)         ((p)->flags)
 
-#define ANYOF_BIT(c)           (1 << ((c) & 7))
+#define ANYOF_BIT(c)           (1U << ((c) & 7))
 
 #define ANYOF_POSIXL_SET(p, c) (((regnode_charclass_posixl*) (p))->classflags 
|= (1U << (c)))
 #define ANYOF_CLASS_SET(p, c)  ANYOF_POSIXL_SET((p), (c))
@@ -519,7 +530,7 @@ struct regnode_ssc {
 #define ANYOF_CLASS_SETALL(ret) ANYOF_POSIXL_SETALL(ret)
 
 #define ANYOF_POSIXL_TEST_ANY_SET(p)                               \
-        ((ANYOF_FLAGS(p) & ANYOF_POSIXL)                           \
+        ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL)                           \
         && (((regnode_charclass_posixl*)(p))->classflags))
 #define ANYOF_CLASS_TEST_ANY_SET(p) ANYOF_POSIXL_TEST_ANY_SET(p)
 
@@ -532,7 +543,7 @@ struct regnode_ssc {
                         == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
 
 #define ANYOF_POSIXL_TEST_ALL_SET(p)                                   \
-        ((ANYOF_FLAGS(p) & ANYOF_POSIXL)                               \
+        ((ANYOF_FLAGS(p) & ANYOF_MATCHES_POSIXL)                               
\
          && ((regnode_charclass_posixl*) (p))->classflags              \
                         == ((1U << ((ANYOF_POSIXL_MAX) - 1))) - 1)
 
@@ -546,19 +557,12 @@ struct regnode_ssc {
 #define ANYOF_BITMAP_BYTE(p, c)        (ANYOF_BITMAP(p)[(((U8)(c)) >> 3) & 31])
 #define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |=  ANYOF_BIT(c))
 #define ANYOF_BITMAP_CLEAR(p,c)        (ANYOF_BITMAP_BYTE(p, c) &= 
~ANYOF_BIT(c))
-#define ANYOF_BITMAP_TEST(p, c)        (ANYOF_BITMAP_BYTE(p, c) &   
ANYOF_BIT(c))
+#define ANYOF_BITMAP_TEST(p, c)        cBOOL(ANYOF_BITMAP_BYTE(p, c) &   
ANYOF_BIT(c))
 
 #define ANYOF_BITMAP_SETALL(p)         \
        memset (ANYOF_BITMAP(p), 255, ANYOF_BITMAP_SIZE)
 #define ANYOF_BITMAP_CLEARALL(p)       \
        Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
-#if ANYOF_BITMAP_SIZE == 32
-/* Check that all 256 bits are all set. */
-#   define ANYOF_BITMAP_TESTALLSET(p)  /* Assumes sizeof(p) == 32 */     \
-       memEQ (ANYOF_BITMAP(p), 
"\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377\377",
 ANYOF_BITMAP_SIZE)
-#else
-#   error Need to fix this if raise bitmap size.  (As of this writing this 
macro is unused in the core)
-#endif
 
 #define ANYOF_SKIP             ((ANYOF_SIZE - 1)/sizeof(regnode))
 #define ANYOF_POSIXL_SKIP      ((ANYOF_POSIXL_SIZE - 1)/sizeof(regnode))
diff --git a/regexec.c b/regexec.c
index b6d163e..52ff312 100644
--- a/regexec.c
+++ b/regexec.c
@@ -7678,19 +7678,22 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
     if (c < NUM_ANYOF_CODE_POINTS) {
        if (ANYOF_BITMAP_TEST(n, c))
            match = TRUE;
-       else if (flags & ANYOF_NON_UTF8_NON_ASCII_ALL
-               && ! utf8_target
-               && ! isASCII(c))
+       else if ((flags & ANYOF_MATCHES_ALL_NON_UTF8_NON_ASCII)
+                 && ! utf8_target
+                 && ! isASCII(c))
        {
            match = TRUE;
        }
        else if (flags & ANYOF_LOCALE_FLAGS) {
-           if (flags & ANYOF_LOC_FOLD) {
-                if (ANYOF_BITMAP_TEST(n, PL_fold_locale[c])) {
-                    match = TRUE;
-                }
+           if ((flags & ANYOF_LOC_FOLD)
+                && c < 256
+               && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
+            {
+                match = TRUE;
             }
-           if (! match && ANYOF_POSIXL_TEST_ANY_SET(n)) {
+            else if (ANYOF_POSIXL_TEST_ANY_SET(n)
+                     && c < 256
+            ) {
 
                 /* The data structure is arranged so bits 0, 2, 4, ... are set
                  * if the class includes the Posix character class given by
@@ -7743,14 +7746,16 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
     /* If the bitmap didn't (or couldn't) match, and something outside the
      * bitmap could match, try that. */
     if (!match) {
-       if (c >= 256 && (flags & ANYOF_ABOVE_LATIN1_ALL)) {
-           match = TRUE;       /* Everything above 255 matches */
+       if (c >= NUM_ANYOF_CODE_POINTS
+            && (flags & ANYOF_MATCHES_ALL_ABOVE_BITMAP))
+        {
+           match = TRUE;       /* Everything above the bitmap matches */
        }
-       else if ((flags & ANYOF_NONBITMAP_NON_UTF8)
-                 || (utf8_target && (flags & ANYOF_UTF8))
+       else if ((flags & ANYOF_HAS_NONBITMAP_NON_UTF8_MATCHES)
+                 || (utf8_target && (flags & ANYOF_HAS_UTF8_NONBITMAP_MATCHES))
                   || ((flags & ANYOF_LOC_FOLD)
                        && IN_UTF8_CTYPE_LOCALE
-                       && ARG(n) != ANYOF_NONBITMAP_EMPTY))
+                       && ARG(n) != ANYOF_ONLY_HAS_BITMAP))
         {
             SV* only_utf8_locale = NULL;
            SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,
diff --git a/sv.c b/sv.c
index 65aa456..78086b4 100644
--- a/sv.c
+++ b/sv.c
@@ -14444,6 +14444,7 @@ perl_clone_using(PerlInterpreter *proto_perl, UV flags,
     PL_Latin1          = sv_dup_inc(proto_perl->ILatin1, param);
     PL_UpperLatin1     = sv_dup_inc(proto_perl->IUpperLatin1, param);
     PL_AboveLatin1     = sv_dup_inc(proto_perl->IAboveLatin1, param);
+    PL_InBitmap         = sv_dup_inc(proto_perl->IInBitmap, param);
 
     PL_NonL1NonFinalFold = sv_dup_inc(proto_perl->INonL1NonFinalFold, param);
     PL_HasMultiCharFold = sv_dup_inc(proto_perl->IHasMultiCharFold, param);

--
Perl5 Master Repository

Reply via email to