In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/903c858a4dec7ba02a3b2af464fd61878f1bdf48?hp=e273c3f6b6604eff1cf509219ad71949b903654a>

- Log -----------------------------------------------------------------
commit 903c858a4dec7ba02a3b2af464fd61878f1bdf48
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 26 12:37:00 2015 -0700

    regcomp.c: Add comment.
    
    This should have been included in commit
    285b5ca0145796a915dec03e87e0176fd4681041

M       regcomp.c

commit c286c389b9042dbcef4d300c4faacf7efc8f6e75
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 26 12:35:32 2015 -0700

    regexec.c: Avoid a function call
    
    Not infrequently, a UTF-8 string will contain ASCII.  In this case, by
    adding a test for this we can avoid the function call that is needed for
    more complicated cases.

M       regexec.c

commit 428c1aff5a0e12f06706ffce0a395e4123d8965e
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 26 12:34:07 2015 -0700

    regcomp.h: Remove extraneous 'struct's
    
    Better to not have this clutter.

M       regcomp.h

commit 0effac6071c5a0244f527fa9e019938a107ab9ac
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 26 11:47:26 2015 -0700

    regcomp.h: Fix shift and mask
    
    The mask removed here was to make sure that right shifting didn't
    propagate the sign bit, but is unnecessary as the value shifted is
    unsigned.  And confining things to a U8 with that mask assumes that the
    bit vector being operated on has 256 elements max.  This isn't
    necessarily true these days, as one can change ANYOF_BITMAP_SIZE.
    In fact changing that number was failing until this commit.
    
    It also adds white space to make it easier to read.

M       regcomp.h

commit 4cbce0a6b377e059ea536b5671a097422d236ec2
Author: Karl Williamson <[email protected]>
Date:   Sat Dec 26 11:28:09 2015 -0700

    regcomp.h: Use more basic macro in #defines
    
    Instead of having this code repeated in several places, call
    the more base macro from the others.

M       regcomp.h

commit d1c40ef5b615823c7946ca520b1fedd807df469a
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 24 22:42:08 2015 -0700

    regcomp.h: Free up bit in ANYOF FLAGS field
    
    I've long been confronted with trying to do things to create a spare bit
    to use.  I thought it easier now, while it's fresh in my mind, to free
    up one for future use, rather than re-learn things when it next becomes
    necessary.  It would have been a different story if the freed bit had
    required a performance penalty.
    
    This commit also updates the comments about how to create even more
    spare bits should it become necessary.

M       regcomp.c
M       regcomp.h
M       regexec.c

commit 037715a6d8890fc5a104494153096c071496030a
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 23 12:43:30 2015 -0700

    regcomp.h: Shorten, clarify names of internal flags
    
    Some of the names are expanded slightly and not shortened

M       regcomp.c
M       regcomp.h
M       regexec.c

commit 7e327f7638ec744b796b0bcf2fa43cb94ed67d6c
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 23 12:38:23 2015 -0700

    APItest.xs: Silence compiler warning on 32-bit machines
    
    One warning remains, otherwise things don't work.

M       ext/XS-APItest/APItest.pm
M       ext/XS-APItest/APItest.xs
-----------------------------------------------------------------------

Summary of changes:
 ext/XS-APItest/APItest.pm |   2 +-
 ext/XS-APItest/APItest.xs |   2 +-
 regcomp.c                 |  33 +++++++---
 regcomp.h                 | 154 ++++++++++++++++++++++------------------------
 regexec.c                 |  16 ++---
 5 files changed, 107 insertions(+), 100 deletions(-)

diff --git a/ext/XS-APItest/APItest.pm b/ext/XS-APItest/APItest.pm
index 0fe79e8..f1f6472 100644
--- a/ext/XS-APItest/APItest.pm
+++ b/ext/XS-APItest/APItest.pm
@@ -5,7 +5,7 @@ use strict;
 use warnings;
 use Carp;
 
-our $VERSION = '0.77';
+our $VERSION = '0.78';
 
 require XSLoader;
 
diff --git a/ext/XS-APItest/APItest.xs b/ext/XS-APItest/APItest.xs
index ebdef68..77a38c5 100644
--- a/ext/XS-APItest/APItest.xs
+++ b/ext/XS-APItest/APItest.xs
@@ -1521,7 +1521,7 @@ xsreturn_iv()
 void
 xsreturn_uv()
     PPCODE:
-        XSRETURN_UV( (U32)((1<<31) + 1) );
+        XSRETURN_UV( (U32)((1U<<31) + 1) );
 
 void
 xsreturn_nv()
diff --git a/regcomp.c b/regcomp.c
index ab7a5d3..2df6ad7 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -600,6 +600,9 @@ static const scan_data_t zero_scan_data =
  *
  * When the substitute is constructed, we save (tI -sI) as RExC_precomp_adj,
  * and we save tC as RExC_adjusted_start.
+ *
+ * During normal processing of the input pattern, everything points to that,
+ * with RExC_precomp_adj set to 0, and RExC_adjusted_start set to sI.
  */
 
 #define tI_sI           RExC_precomp_adj
@@ -1240,7 +1243,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t 
*pRExC_state,
         }
 
         /* Get the code points valid only under UTF-8 locales */
-        if ((ANYOF_FLAGS(node) & ANYOF_LOC_FOLD)
+        if ((ANYOF_FLAGS(node) & ANYOFL_FOLD)
             && ary[2] && ary[2] != &PL_sv_undef)
         {
             only_utf8_locale_invlist = ary[2];
@@ -1287,7 +1290,7 @@ S_get_ANYOF_cp_list_for_ssc(pTHX_ const RExC_state_t 
*pRExC_state,
     if (ANYOF_FLAGS(node) & ANYOF_INVERT) {
         _invlist_invert(invlist);
     }
-    else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOF_LOC_FOLD) {
+    else if (new_node_has_latin1 && ANYOF_FLAGS(node) & ANYOFL_FOLD) {
 
         /* Under /li, any 0-255 could fold to any other 0-255, depending on the
          * locale.  We can skip this if there are no 0-255 at all. */
@@ -1366,6 +1369,10 @@ S_ssc_and(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc,
             &( ANYOF_COMMON_FLAGS
               |ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
               
|ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
+            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(and_with))) {
+                anded_flags &=
+                    ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            }
         }
     }
 
@@ -1522,6 +1529,10 @@ S_ssc_or(pTHX_ const RExC_state_t *pRExC_state, 
regnode_ssc *ssc,
             |= ANYOF_FLAGS(or_with)
              & ( ANYOF_SHARED_d_MATCHES_ALL_NON_UTF8_NON_ASCII_non_d_WARN_SUPER
                 
|ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP);
+            if (ANYOFL_UTF8_LOCALE_REQD(ANYOF_FLAGS(or_with))) {
+                ored_flags |=
+                    ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
+            }
         }
     }
 
@@ -14201,7 +14212,8 @@ redo_curchar:
         assert(OP(node) == ANYOF);
 
         OP(node) = ANYOFL;
-        ANYOF_FLAGS(node) |= ANYOF_LOC_REQ_UTF8;
+        ANYOF_FLAGS(node)
+                |= ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
     }
 
     if (save_fold) {
@@ -16098,14 +16110,15 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth,
      * locales, or the class matches at least one 0-255 range code point */
     if (LOC && FOLD) {
         if (only_utf8_locale_list) {
-            ANYOF_FLAGS(ret) |=  ANYOF_LOC_FOLD
-                                |ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES;
+            ANYOF_FLAGS(ret)
+                 |=  ANYOFL_FOLD
+                    |ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD;
         }
         else if (cp_list) { /* Look to see if a 0-255 code point is in list */
             UV start, end;
             invlist_iterinit(cp_list);
             if (invlist_iternext(cp_list, &start, &end) && start < 256) {
-                ANYOF_FLAGS(ret) |= ANYOF_LOC_FOLD;
+                ANYOF_FLAGS(ret) |= ANYOFL_FOLD;
             }
             invlist_iterfinish(cp_list);
         }
@@ -17473,14 +17486,14 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
 
 
        if (OP(o) == ANYOFL) {
-            if (flags & ANYOF_LOC_REQ_UTF8) {
+            if (ANYOFL_UTF8_LOCALE_REQD(flags)) {
                 sv_catpvs(sv, "{utf8-loc}");
             }
             else {
                 sv_catpvs(sv, "{loc}");
             }
         }
-       if (flags & ANYOF_LOC_FOLD)
+       if (flags & ANYOFL_FOLD)
            sv_catpvs(sv, "{i}");
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
        if (flags & ANYOF_INVERT)
@@ -17507,7 +17520,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
            || (flags
                 & ( ANYOF_MATCHES_ALL_ABOVE_BITMAP
                    
|ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP
-                   |ANYOF_LOC_FOLD)))
+                   |ANYOFL_FOLD)))
         {
             if (do_sep) {
                 Perl_sv_catpvf(aTHX_ sv,"%s][%s",PL_colors[1],PL_colors[0]);
@@ -17589,7 +17602,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o, const regmatch_
                     SvREFCNT_dec_NN(lv);
                 }
 
-                if ((flags & ANYOF_LOC_FOLD)
+                if ((flags & ANYOFL_FOLD)
                      && only_utf8_locale
                      && only_utf8_locale != &PL_sv_undef)
                 {
diff --git a/regcomp.h b/regcomp.h
index a8955f3..630e2e9 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -403,19 +403,18 @@ struct regnode_ssc {
  *  2)  A subset of item 1) is if all possible code points outside the bitmap
  *      match.  This is a common occurrence when the class is complemented,
  *      like /[^ij]/.  Therefore a bit is reserved to indicate this,
- *      ANYOF_MATCHES_ALL_ABOVE_BITMAP.  If it became necessary, this flag 
could
- *      be replaced by using the normal swash mechanism, but with a performance
- *      penalty.
+ *      rather than having an expensive swash created,
+ *      ANYOF_MATCHES_ALL_ABOVE_BITMAP.
  *  3)  Under /d rules, it can happen that code points that are in the upper
  *      latin1 range (\x80-\xFF or their equivalents on EBCDIC platforms) match
  *      only if the runtime target string being matched against is UTF-8.  For
  *      example /[\w[:punct:]]/d.  This happens only for posix classes (with a
- *      couple of exceptions, like \d), and all such ones also have
- *      above-bitmap matches.  Thus, 3) implies 1) as well.  Note that /d rules
- *      are no longer encouraged; 'use 5.14' or higher deselects them.  But a
- *      flag is required so that they can be properly handled.  But it can be a
- *      shared flag: see 5) below.
- *  4)  Also under /d rules, something like /[\Wfoo] will match everything in
+ *      couple of exceptions, like \d where it doesn't happen), and all such
+ *      ones also have above-bitmap matches.  Thus, 3) implies 1) as well.
+ *      Note that /d rules are no longer encouraged; 'use 5.14' or higher
+ *      deselects them.  But a flag is required so that they can be properly
+ *      handled.  But it can be a shared flag: see 5) below.
+ *  4)  Also under /d rules, something like /[\Wfoo]/ will match everything in
  *      the \x80-\xFF range, unless the string being matched against is UTF-8.
  *      A swash could be created for this case, but this is relatively common,
  *      and it turns out that it's all or nothing:  if any one of these code
@@ -438,66 +437,46 @@ struct regnode_ssc {
  *      UTF-8 one.  These are quite rare, so it would be good to avoid the
  *      expense of looking for them.  But /l matching is slow anyway, and we've
  *      traditionally not worried too much about its performance.  And this
- *      condition requires the ANYOF_LOC_FOLD flag to be set, so testing for
+ *      condition requires the ANYOFL_FOLD flag to be set, so testing for
  *      that flag would be sufficient to rule out most cases of this.  So it is
- *      unclear if this should have a flag or not.  But, one is currently
- *      allocated for this purpose, ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES (and the
- *      text below indicates how to share it, should another bit be needed).
+ *      unclear if this should have a flag or not.  But, this flag can be
+ *      shared with another, so it doesn't occupy extra space.
  *
- * At the moment, there are no spare bits, but this could be changed by various
- * tricks.
+ * At the moment, there is one spare bit, but this could be increased by
+ * various tricks.
  *
- * Note that item ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES is not independent of the
- * ANYOF_LOC_FOLD flag below.  Also, the ANYOF_LOC_REQ_UTF8 flag is set only if
- * both these aren't.  We can therefore share ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES
- * with ANYOF_LOC_REQ_UTF8, so what the shared flag means depends on the
- * ANYOF_LOC_FOLD flag.
+ * If just one more bit is needed, at this writing it seems to khw that the
+ * best choice would be to make ANYOF_MATCHES_ALL_ABOVE_BITMAP not a flag, but
+ * something like
  *
- * Beyond that, note that the information may be conveyed by creating new
- * regnode types.  This is not the best solution, as shown later in this
- * paragraph, but it is something that is feasible.  We could have a regnode
- * for ANYOF_INVERT, for example.  A complication of this is that the regexec.c
- * REGINCLASS macro assumes that it can just use the bitmap if no flags are
- * set.  This would have to be changed to add extra tests for the node type, or
- * a special flag reserved that means unspecified special handling, and then 
the
- * node-type would be used internally to sort that out.  So we could gain a bit
- * by having an ANYOF_SPECIAL flag, and a node type for INVERT, and another for
- * POSIXL, and still another for INVERT_POSIXL.  This example illustrates one
- * problem with this, a combinatorial explosion of node types.  The one node
- * type khw can think of that doesn't have this explosion issue is
- * ANYOF_LOC_REQ_UTF8.  This flag is a natural candidate for being a separate
- * node type because it is a specialization of the current ANYOFL, and because
- * no other ANYOFL-only flags are set when it is; also most of its uses are
- * actually outside the reginclass() function, so this could be done with no
- * performance penalty.  But since it can be shared, as noted above, it doesn't
- * take up space anyway.  Another issue when turning a flag into a node type, 
is
- * that a SSC may use that flag -- not just a regular ANYOF[DL]?.  In the case
- * of ANYOF_LOC_REQ_UTF8, the only likely problem is accurately settting the
- * SSC node-type to the new one, which would likely involve S_ssc_or and
- * S_ssc_and, and not how the SSC currently gets set to ANYOFL.
+ *      #define ANYOF_MATCHES_ALL_ABOVE_BITMAP      ((U32) -2)
  *
- * Another possibility is to instead rename the ANYOF_POSIXL flag to be
- * ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits beyond what a
- * regular one does.  That's what it effectively means now, with the extra
- * space all for the POSIX class flags.  But those classes actually only occupy
- * 30 bits, so 2 of the locale flags could be moved to that extra space.  The
- * downside of this is that ANYOFL nodes with whichever of the flags get moved
- * would have to have the extra space always allocated.
+ * and access it through the ARG like ANYOF_ONLY_HAS_BITMAP is.  This flag is
+ * used by all ANYOF node types, and it could be used to avoid calling the
+ * handler function, as the macro REGINCLASS in regexec.c does now for other
+ * cases.
+ *
+ * Another possibility is to instead (or additionally) rename the ANYOF_POSIXL
+ * flag to be ANYOFL_LARGE, to mean that the ANYOF node has an extra 32 bits
+ * beyond what a regular one does.  That's what it effectively means now, with
+ * the extra space all for the POSIX class flags.  But those classes actually
+ * only occupy 30 bits, so the ANYOFL_FOLD and
+ * ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD flags could be moved
+ * to that extra space.  The 30 bits in the extra word would indicate if a
+ * posix class should be looked up or not.  The downside of this is that ANYOFL
+ * nodes with folding would always have to have the extra space allocated, even
+ * if they didn't use the 30 posix bits.  There isn't an SSC problem as all
+ * SSCs are this large anyway.
  *
  * One could completely remove ANYOFL_LARGE and make all ANYOFL nodes large.
- * The 30 bits in the extra word would indicate if a posix class should be
- * looked up or not.  There isn't an SSC problem as all SSCs are this large
- * anyway, and the SSC could be set to this node type.   REGINCLASS would have
- * to be modified so that if the node type were this, it would call
- * reginclass(), as the flag bit that indicates to do this now would be gone.
- * If 2 locale flags are moved to the larger structure, this would free up a
- * total of 4 bits.  If this were done, we could create an ANYOF_INVERT
- * node-type without a combinatorial explosion, getting us to 5 bits.  And,
- * keep in mind that ANYOF_MATCHES_ALL_ABOVE_BITMAP is solely for performance,
- * so could be removed.  The other performance-related flags are shareable with
- * flags that are required.
+ * REGINCLASS would have to be modified so that if the node type were this, it
+ * would call reginclass(), as the flag bit that indicates to do this now would
+ * be gone.
+ *
+ * All told, 5 bits could be available for other uses if all of the above were
+ * done.
  *
- * Several flags are not used in synthetic start class (SSC) nodes, so could be
+ * Some flags are not used in synthetic start class (SSC) nodes, so could be
  * shared should new flags be needed for SSCs, like SSC_MATCHES_EMPTY_STRING
  * now. */
 
@@ -519,17 +498,32 @@ struct regnode_ssc {
  * time.  However under locale, the actual folding varies depending on
  * what the locale is at the time of execution, so it has to be deferred until
  * then.  Only set under /l; never in an SSC  */
-#define ANYOF_LOC_FOLD                          0x04
-
-/* If set, ANYOF_LOC_FOLD is also set, and there are potential matches that
- * will be valid only if the locale is a UTF-8 one. */
-#define ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES        0x08
-
-/* If set, means to warn if runtime locale isn't a UTF-8 one.  Only under /l.
- * If set, none of INVERT, LOC_FOLD, POSIXL,
- * ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP can
- * be set.  Can be in an SSC */
-#define ANYOF_LOC_REQ_UTF8                      0x10
+#define ANYOFL_FOLD                             0x04
+
+/* Shared bit set only with ANYOFL and SSC nodes:
+ *    If ANYOFL_FOLD is set, this means there are potential matches valid
+ *       only if the locale is a UTF-8 one.
+ *    If ANYOFL_FOLD is NOT set, this means to warn if the runtime locale
+ *       isn't a UTF-8 one (and the generated node assumes a UTF-8 locale).
+ *       None of INVERT, POSIXL,
+ *       
ANYOF_SHARED_d_UPPER_LATIN1_UTF8_STRING_MATCHES_non_d_RUNTIME_USER_PROP
+ *       can be set.  */
+#define ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD        0x08
+
+/* Convenience macros for teasing apart the meanings when reading the above bit
+ * */
+#define ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE(flags)                        \
+    ((flags & ( ANYOFL_FOLD /* Both bits are set */                         \
+               |ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD))   \
+             == ( ANYOFL_FOLD                                               \
+                 |ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD))
+
+#define  ANYOFL_UTF8_LOCALE_REQD(flags)                                     \
+    ((flags & ( ANYOFL_FOLD /* Only REQD bit is set */                      \
+               |ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD))   \
+             == ANYOFL_SHARED_UTF8_LOCALE_fold_HAS_MATCHES_nonfold_REQD)
+
+/* Spare:                                       0x10 */
 
 /* If set, the node matches every code point NUM_ANYOF_CODE_POINTS and above.
  * Can be in an SSC */
@@ -561,12 +555,12 @@ struct regnode_ssc {
 
 #define ANYOF_FLAGS_ALL                (0xff)
 
-#define ANYOF_LOCALE_FLAGS (ANYOF_LOC_FOLD | ANYOF_MATCHES_POSIXL)
+#define ANYOF_LOCALE_FLAGS (ANYOFL_FOLD | ANYOF_MATCHES_POSIXL)
 
 /* These are the flags that apply to both regular ANYOF nodes and synthetic
  * start class nodes during construction of the SSC.  During finalization of
  * the SSC, other of the flags may get added to it */
-#define ANYOF_COMMON_FLAGS      ANYOF_LOC_REQ_UTF8
+#define ANYOF_COMMON_FLAGS      0
 
 /* Character classes for node->classflags of ANYOF */
 /* Should be synchronized with a table in regprop() */
@@ -686,9 +680,9 @@ struct regnode_ssc {
 
 #define ANYOF_POSIXL_AND(source, dest) STMT_START { (dest)->classflags &= 
(source)->classflags ; } STMT_END
 
-#define ANYOF_BITMAP_ZERO(ret) Zero(((struct 
regnode_charclass*)(ret))->bitmap, ANYOF_BITMAP_SIZE, char)
-#define ANYOF_BITMAP(p)                (((struct 
regnode_charclass*)(p))->bitmap)
-#define ANYOF_BITMAP_BYTE(p, c)        (ANYOF_BITMAP(p)[(((U8)(c)) >> 3) & 31])
+#define ANYOF_BITMAP_ZERO(ret) Zero(((regnode_charclass*)(ret))->bitmap, 
ANYOF_BITMAP_SIZE, char)
+#define ANYOF_BITMAP(p)                ((regnode_charclass*)(p))->bitmap
+#define ANYOF_BITMAP_BYTE(p, c)        BITMAP_BYTE(ANYOF_BITMAP(p), c)
 #define ANYOF_BITMAP_SET(p, c) (ANYOF_BITMAP_BYTE(p, c) |=  ANYOF_BIT(c))
 #define ANYOF_BITMAP_CLEAR(p,c)        (ANYOF_BITMAP_BYTE(p, c) &= 
~ANYOF_BIT(c))
 #define ANYOF_BITMAP_TEST(p, c)        cBOOL(ANYOF_BITMAP_BYTE(p, c) &   
ANYOF_BIT(c))
@@ -698,7 +692,7 @@ struct regnode_ssc {
 #define ANYOF_BITMAP_CLEARALL(p)       \
        Zero (ANYOF_BITMAP(p), ANYOF_BITMAP_SIZE)
 
-#define ANYOF_SKIP             EXTRA_SIZE(struct regnode_charclass)
+#define ANYOF_SKIP             EXTRA_SIZE(regnode_charclass)
 #define ANYOF_POSIXL_SKIP      EXTRA_SIZE(regnode_charclass_posixl)
 
 /*
@@ -914,7 +908,7 @@ typedef struct _reg_ac_data reg_ac_data;
    three different sets... */
 
 #define TRIE_BITMAP(p)         (((reg_trie_data *)(p))->bitmap)
-#define TRIE_BITMAP_BYTE(p, c) (TRIE_BITMAP(p)[(((U8)(c)) >> 3) & 31])
+#define TRIE_BITMAP_BYTE(p, c) BITMAP_BYTE(TRIE_BITMAP(p), c)
 #define TRIE_BITMAP_SET(p, c)  (TRIE_BITMAP_BYTE(p, c) |=  ANYOF_BIT((U8)c))
 #define TRIE_BITMAP_CLEAR(p,c) (TRIE_BITMAP_BYTE(p, c) &= ~ANYOF_BIT((U8)c))
 #define TRIE_BITMAP_TEST(p, c) (TRIE_BITMAP_BYTE(p, c) &   ANYOF_BIT((U8)c))
@@ -923,7 +917,7 @@ typedef struct _reg_ac_data reg_ac_data;
 #define IS_TRIE_AC(op) ((op)>=AHOCORASICK)
 
 
-#define BITMAP_BYTE(p, c)      (((U8*)p)[(((U8)(c)) >> 3) & 31])
+#define BITMAP_BYTE(p, c)      (( (U8*) p) [ ( ( (UV) (c)) >> 3) ] )
 #define BITMAP_TEST(p, c)      (BITMAP_BYTE(p, c) &   ANYOF_BIT((U8)c))
 
 /* these defines assume uniquecharcount is the correct variable, and state may 
be evaluated twice */
diff --git a/regexec.c b/regexec.c
index afe87a5..0e95676 100644
--- a/regexec.c
+++ b/regexec.c
@@ -1826,7 +1826,7 @@ S_find_byclass(pTHX_ regexp * prog, const regnode *c, 
char *s,
     case ANYOFL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
 
-        if ((FLAGS(c) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE) {
+        if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(c)) && ! IN_UTF8_CTYPE_LOCALE) {
             Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
         }
 
@@ -5766,7 +5766,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
        case ANYOFL:  /*  /[abc]/l      */
             _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
 
-            if ((FLAGS(scan) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE)
+            if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(scan)) && ! IN_UTF8_CTYPE_LOCALE)
             {
               Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), 
utf8_locale_required);
             }
@@ -5775,7 +5775,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, 
regnode *prog)
        case ANYOF:  /*   /[abc]/       */
             if (NEXTCHR_IS_EOS)
                 sayNO;
-           if (utf8_target) {
+           if (utf8_target && ! UTF8_IS_INVARIANT(locinput)) {
                if (!reginclass(rex, scan, (U8*)locinput, (U8*)reginfo->strend,
                                                                    
utf8_target))
                    sayNO;
@@ -8301,7 +8301,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const 
regnode *p,
     case ANYOFL:
         _CHECK_AND_WARN_PROBLEMATIC_LOCALE;
 
-        if ((FLAGS(p) & ANYOF_LOC_REQ_UTF8) && ! IN_UTF8_CTYPE_LOCALE) {
+        if (ANYOFL_UTF8_LOCALE_REQD(FLAGS(p)) && ! IN_UTF8_CTYPE_LOCALE) {
             Perl_ck_warner(aTHX_ packWARN(WARN_LOCALE), utf8_locale_required);
         }
         /* FALLTHROUGH */
@@ -8648,7 +8648,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
                 * UTF8_ALLOW_FFFF */
        if (c_len == (STRLEN)-1)
            Perl_croak(aTHX_ "Malformed UTF-8 character (fatal)");
-        if (c > 255 && OP(n) == ANYOFL && ! (flags & ANYOF_LOC_REQ_UTF8)) {
+        if (c > 255 && OP(n) == ANYOFL && ! ANYOFL_UTF8_LOCALE_REQD(flags)) {
             _CHECK_AND_OUTPUT_WIDE_LOCALE_CP_MSG(c);
         }
     }
@@ -8666,7 +8666,7 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
            match = TRUE;
        }
        else if (flags & ANYOF_LOCALE_FLAGS) {
-           if ((flags & ANYOF_LOC_FOLD)
+           if ((flags & ANYOFL_FOLD)
                 && c < 256
                && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
             {
@@ -8751,8 +8751,8 @@ S_reginclass(pTHX_ regexp * const prog, const regnode * 
const n, const U8* const
                                                                  && c < 256
 #                               endif
                                 )))
-                     || ((   flags & ANYOF_ONLY_UTF8_LOC_FOLD_MATCHES)
-                          && IN_UTF8_CTYPE_LOCALE)))
+                     || (   ANYOFL_SOME_FOLDS_ONLY_IN_UTF8_LOCALE(flags)
+                         && IN_UTF8_CTYPE_LOCALE)))
         {
             SV* only_utf8_locale = NULL;
            SV * const sw = _get_regclass_nonbitmap_data(prog, n, TRUE, 0,

--
Perl5 Master Repository

Reply via email to