In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/040aea3a0d449d98f0f858032aa9eba11c90d19d?hp=f53580fec42f3b12264ee27b756dec257c0bb77a>

- Log -----------------------------------------------------------------
commit 040aea3a0d449d98f0f858032aa9eba11c90d19d
Author: Karl Williamson <[email protected]>
Date:   Thu Dec 16 08:44:59 2010 -0700

    regexec.c: white-space only
    
    Commit 9e2c615305806d76433db342e5659ffeccc3746a didn't adjust the white
    space for the changes that it introduced.  This patch does that.

M       regexec.c

commit 78969a9808d226f8ac2f0c992aa6a67140a56ea6
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 15 19:22:37 2010 -0700

    regexec:c Remove unreached code
    
    The new name of ANYOF_LOC_NONBITMAP_FOLD makes it clear that the only
    way folding can be aplicable here is if it is under locale.

M       regexec.c

commit 390656605358790e356331349a2f922ae36ae5df
Author: Karl Williamson <[email protected]>
Date:   Wed Dec 15 18:34:59 2010 -0700

    Change name of regex intrnl macro to new meaning
    
    ANYOF_FOLD is now used only under fewer conditions.  Otherwise the
    bitmap of character 0-255 is fully calculated with the folds, and the
    flag is not set.  One condition is under locale, where the folds aren't
    known at compile time; the other is for things accessible through a
    swash.
    
    By changing the name to its new meaning, certain optimizations become more
    obvious.

M       regcomp.c
M       regcomp.h
M       regexec.c
M       utf8.h
-----------------------------------------------------------------------

Summary of changes:
 regcomp.c |   34 +++++++++++-----------
 regcomp.h |   15 ++++++++--
 regexec.c |   93 +++++++++++++++++++++++++++++--------------------------------
 utf8.h    |    2 +-
 4 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 122c560..d52bf13 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -710,7 +710,7 @@ S_cl_anything(const RExC_state_t *pRExC_state, struct 
regnode_charclass_class *c
     cl->flags = ANYOF_EOS|ANYOF_UNICODE_ALL;
     if (LOC)
        cl->flags |= ANYOF_LOCALE;
-    cl->flags |= ANYOF_FOLD;
+    cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
 }
 
 /* Can match anything (initialization) */
@@ -767,8 +767,8 @@ S_cl_and(struct regnode_charclass_class *cl,
     if (!(ANYOF_CLASS_TEST_ANY_SET(and_with))
        && !(ANYOF_CLASS_TEST_ANY_SET(cl))
        && (and_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-       && !(and_with->flags & ANYOF_FOLD)
-       && !(cl->flags & ANYOF_FOLD)) {
+       && !(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+       && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) {
        int i;
 
        if (and_with->flags & ANYOF_INVERT)
@@ -781,8 +781,8 @@ S_cl_and(struct regnode_charclass_class *cl,
     if (!(and_with->flags & ANYOF_EOS))
        cl->flags &= ~ANYOF_EOS;
 
-    if (!(and_with->flags & ANYOF_FOLD))
-       cl->flags &= ~ANYOF_FOLD;
+    if (!(and_with->flags & ANYOF_LOC_NONBITMAP_FOLD))
+       cl->flags &= ~ANYOF_LOC_NONBITMAP_FOLD;
 
     if (cl->flags & ANYOF_UNICODE_ALL && and_with->flags & ANYOF_NONBITMAP &&
        !(and_with->flags & ANYOF_INVERT)) {
@@ -818,8 +818,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct 
regnode_charclass_class *cl, con
         *   (OK1(i) | OK1(i')) | (!OK1(i) & !OK1(i'))
         */
        if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-            && !(or_with->flags & ANYOF_FOLD)
-            && !(cl->flags & ANYOF_FOLD) ) {
+            && !(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+            && !(cl->flags & ANYOF_LOC_NONBITMAP_FOLD) ) {
            int i;
 
            for (i = 0; i < ANYOF_BITMAP_SIZE; i++)
@@ -831,8 +831,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct 
regnode_charclass_class *cl, con
     } else {
        /* (B1 | CL1) | (B2 | CL2) = (B1 | B2) | (CL1 | CL2)) */
        if ( (or_with->flags & ANYOF_LOCALE) == (cl->flags & ANYOF_LOCALE)
-            && (!(or_with->flags & ANYOF_FOLD)
-                || (cl->flags & ANYOF_FOLD)) ) {
+            && (!(or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+                || (cl->flags & ANYOF_LOC_NONBITMAP_FOLD)) ) {
            int i;
 
            /* OR char bitmap and class bitmap separately */
@@ -851,8 +851,8 @@ S_cl_or(const RExC_state_t *pRExC_state, struct 
regnode_charclass_class *cl, con
     if (or_with->flags & ANYOF_EOS)
        cl->flags |= ANYOF_EOS;
 
-    if (or_with->flags & ANYOF_FOLD)
-       cl->flags |= ANYOF_FOLD;
+    if (or_with->flags & ANYOF_LOC_NONBITMAP_FOLD)
+       cl->flags |= ANYOF_LOC_NONBITMAP_FOLD;
 
     /* If both nodes match something outside the bitmap, but what they match
      * outside is not the same pointer, and hence not easily compared, give up
@@ -3085,7 +3085,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                if (uc >= 0x100 ||
                    (!(data->start_class->flags & (ANYOF_CLASS | ANYOF_LOCALE))
                    && !ANYOF_BITMAP_TEST(data->start_class, uc)
-                   && (!(data->start_class->flags & ANYOF_FOLD)
+                   && (!(data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD)
                        || !ANYOF_BITMAP_TEST(data->start_class, 
PL_fold_latin1[uc])))
                     )
                    compat = 0;
@@ -3140,7 +3140,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                if (compat) {
                    ANYOF_BITMAP_SET(data->start_class, uc);
                    data->start_class->flags &= ~ANYOF_EOS;
-                   data->start_class->flags |= ANYOF_FOLD;
+                   data->start_class->flags |= ANYOF_LOC_NONBITMAP_FOLD;
                    if (OP(scan) == EXACTFL) {
                        data->start_class->flags |= ANYOF_LOCALE;
                    }
@@ -3155,7 +3155,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode 
**scanp,
                }
            }
            else if (flags & SCF_DO_STCLASS_OR) {
-               if (data->start_class->flags & ANYOF_FOLD) {
+               if (data->start_class->flags & ANYOF_LOC_NONBITMAP_FOLD) {
                    /* false positive possible if the class is case-folded.
                       Assume that the locale settings are the same... */
                    if (uc < 0x100) {
@@ -8913,7 +8913,7 @@ parseit:
      * which we have to wait to see what folding is in effect at runtime, and
      * for things not in the bitmap */
     if (FOLD && (LOC || ANYOF_FLAGS(ret) & ANYOF_NONBITMAP)) {
-        ANYOF_FLAGS(ret) |= ANYOF_FOLD;
+        ANYOF_FLAGS(ret) |= ANYOF_LOC_NONBITMAP_FOLD;
     }
 
     /* Optimize inverted simple patterns (e.g. [^a-z]).  Note that this doesn't
@@ -8974,7 +8974,7 @@ parseit:
            /* A locale node with one point can be folded; all the other cases
             * with folding will have two points, since we calculate them above
             */
-           if (ANYOF_FLAGS(ret) & ANYOF_FOLD) {
+           if (ANYOF_FLAGS(ret) & ANYOF_LOC_NONBITMAP_FOLD) {
                 op = EXACTFL;
            }
            else {
@@ -9745,7 +9745,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const 
regnode *o)
 
        if (flags & ANYOF_LOCALE)
            sv_catpvs(sv, "{loc}");
-       if (flags & ANYOF_FOLD)
+       if (flags & ANYOF_LOC_NONBITMAP_FOLD)
            sv_catpvs(sv, "{i}");
        Perl_sv_catpvf(aTHX_ sv, "[%s", PL_colors[0]);
        if (flags & ANYOF_INVERT)
diff --git a/regcomp.h b/regcomp.h
index 00fd945..3e87aa9 100644
--- a/regcomp.h
+++ b/regcomp.h
@@ -314,9 +314,18 @@ struct regnode_charclass_class {
 
 /* Flags for node->flags of ANYOF */
 
-#define ANYOF_LOCALE           0x01
-#define ANYOF_FOLD             0x02
-#define ANYOF_INVERT           0x04
+#define ANYOF_LOCALE            0x01
+
+/* The fold is calculated and stored in the bitmap where possible at compile
+ * time.  However there are two cases where it isn't possible.  These share
+ * this bit:  1) under locale, where the actual folding varies depending on
+ * what the locale is at the time of execution; and 2) where the folding is
+ * specified in a swash, not the bitmap, such as characters which aren't
+ * specified in the bitmap, or properties that aren't looked at at compile time
+ */
+#define ANYOF_LOC_NONBITMAP_FOLD 0x02
+
+#define ANYOF_INVERT            0x04
 
 /* CLASS is never set unless LOCALE is too: has runtime \d, \w, [:posix:], ... 
*/
 #define ANYOF_CLASS     0x08
diff --git a/regexec.c b/regexec.c
index 7778992..512be67 100644
--- a/regexec.c
+++ b/regexec.c
@@ -6339,56 +6339,50 @@ S_reginclass(pTHX_ const regexp * const prog, register 
const regnode * const n,
     if (c < 256) {
        if (ANYOF_BITMAP_TEST(n, c))
            match = TRUE;
-       else if (flags & ANYOF_FOLD) {
-           U8 f;
 
-           if (flags & ANYOF_LOCALE) {
-               PL_reg_flags |= RF_tainted;
-               f = PL_fold_locale[c];
-           }
-           else
-               f = PL_fold[c];
-           if (f != c && ANYOF_BITMAP_TEST(n, f))
-               match = TRUE;
-       }
-       
-       if (!match && ANYOF_CLASS_TEST_ANY_SET(n)) {
-           PL_reg_flags |= RF_tainted;     /* CLASS implies LOCALE */
-           if (
-               (ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
-               (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
-               (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
-               (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
-               (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
-               (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
-               (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
-               ) /* How's that for a conditional? */
+       else if (flags & ANYOF_LOCALE) {
+           PL_reg_flags |= RF_tainted;
+
+           if ((flags & ANYOF_LOC_NONBITMAP_FOLD)
+                && ANYOF_BITMAP_TEST(n, PL_fold_locale[c]))
            {
                match = TRUE;
            }
+           else if (ANYOF_CLASS_TEST_ANY_SET(n) &&
+                    ((ANYOF_CLASS_TEST(n, ANYOF_ALNUM)   &&  isALNUM_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NALNUM)  && !isALNUM_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_SPACE)   &&  isSPACE_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NSPACE)  && !isSPACE_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_DIGIT)   &&  isDIGIT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NDIGIT)  && !isDIGIT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_ALNUMC)  &&  isALNUMC_LC(c)) ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NALNUMC) && !isALNUMC_LC(c)) ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_ALPHA)   &&  isALPHA_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NALPHA)  && !isALPHA_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_ASCII)   &&  isASCII(c))     ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NASCII)  && !isASCII(c))     ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_CNTRL)   &&  isCNTRL_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NCNTRL)  && !isCNTRL_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_GRAPH)   &&  isGRAPH_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NGRAPH)  && !isGRAPH_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_LOWER)   &&  isLOWER_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NLOWER)  && !isLOWER_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_PRINT)   &&  isPRINT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NPRINT)  && !isPRINT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_PUNCT)   &&  isPUNCT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NPUNCT)  && !isPUNCT_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_UPPER)   &&  isUPPER_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NUPPER)  && !isUPPER_LC(c))  ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_XDIGIT)  &&  isXDIGIT(c))    ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NXDIGIT) && !isXDIGIT(c))    ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_PSXSPC)  &&  isPSXSPC(c))    ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NPSXSPC) && !isPSXSPC(c))    ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_BLANK)   &&  isBLANK(c))     ||
+                     (ANYOF_CLASS_TEST(n, ANYOF_NBLANK)  && !isBLANK(c))
+                    ) /* How's that for a conditional? */
+           ) {
+               match = TRUE;
+           }
        }
     }
 
@@ -6397,8 +6391,9 @@ S_reginclass(pTHX_ const regexp * const prog, register 
const regnode * const n,
     if (!match) {
        if (utf8_target && (flags & ANYOF_UNICODE_ALL)) {
            if (c >= 256
-               || ((flags & ANYOF_FOLD) /* Latin1 1 that has a non-Latin1 fold
-                                           should match */
+               || ((flags & ANYOF_LOC_NONBITMAP_FOLD) /* Latin1 1 that has a
+                                                         non-Latin1 fold
+                                                         should match */
                    && 
_HAS_NONLATIN1_FOLD_CLOSURE_ONLY_FOR_USE_BY_REGCOMP_DOT_C_AND_REGEXEC_DOT_C(c)))
            {
                match = TRUE;
@@ -6420,7 +6415,7 @@ S_reginclass(pTHX_ const regexp * const prog, register 
const regnode * const n,
                }
                if (swash_fetch(sw, utf8_p, 1))
                    match = TRUE;
-               else if (flags & ANYOF_FOLD) {
+               else if (flags & ANYOF_LOC_NONBITMAP_FOLD) {
                    if (!match && lenp && av) {
                        I32 i;
                        for (i = 0; i <= av_len(av); i++) {
diff --git a/utf8.h b/utf8.h
index 405b8b4..a162114 100644
--- a/utf8.h
+++ b/utf8.h
@@ -291,7 +291,7 @@ Perl's extended UTF-8 means we can have start bytes up to 
FF.
 #define ANYOF_FOLD_SHARP_S(node, input, end)   \
        (ANYOF_BITMAP_TEST(node, LATIN_SMALL_LETTER_SHARP_S) && \
         (ANYOF_FLAGS(node) & ANYOF_NONBITMAP) && \
-        (ANYOF_FLAGS(node) & ANYOF_FOLD) && \
+        (ANYOF_FLAGS(node) & ANYOF_LOC_NONBITMAP_FOLD) && \
         ((end) > (input) + 1) && \
         toLOWER((input)[0]) == 's' && \
         toLOWER((input)[1]) == 's')

--
Perl5 Master Repository

Reply via email to