In perl.git, the branch blead has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/a5540cf9741163e5c13e99582ebe3a6ba4f3d3fa?hp=889458f14071a618cdc0fb519092327c4d5f5f61>

- Log -----------------------------------------------------------------
commit a5540cf9741163e5c13e99582ebe3a6ba4f3d3fa
Author: Karl Williamson <[email protected]>
Date:   Mon Oct 17 15:08:08 2016 -0600

    PATCH: [perl #129322] S_invlist_clear(SV *): Assertion `invlist' failed
    
    This was the result of an inconsistency in the inversion list union and
    intersection routines, where under some conditions the function returned
    a new inversion list, and under other conditions it just changed one of
    the input ones.  The caller knew about one of those and compensated, but
    that compensation was erroneous given other conditions.  This violated
    encapsulation.  The fix is make the called functions always consistent.

M       regcomp.c
M       t/re/regex_sets.t

commit 8204e83c388410f177953bac8b1f3a0dd3d66602
Author: Karl Williamson <[email protected]>
Date:   Sat Oct 15 10:30:08 2016 -0600

    regcomp.c: Fix erroneous comment; clarify others

M       regcomp.c

commit a03e9135315c9b42294b83389d345c6e3953a3f7
Author: Karl Williamson <[email protected]>
Date:   Wed Aug 31 17:52:19 2016 -0600

    Add a regex_sets debugging function
    
    This is enabled by a C flag, as commented.  It is designed to be found
    only by someone reading the code and wanting something temporary to help
    in debugging.

M       embed.fnc
M       embed.h
M       proto.h
M       regcomp.c
-----------------------------------------------------------------------

Summary of changes:
 embed.fnc         |   6 ++
 embed.h           |   5 +
 proto.h           |   7 ++
 regcomp.c         | 280 +++++++++++++++++++++++++++---------------------------
 t/re/regex_sets.t |   5 +
 5 files changed, 162 insertions(+), 141 deletions(-)

diff --git a/embed.fnc b/embed.fnc
index 1c9f584..c58f49a 100644
--- a/embed.fnc
+++ b/embed.fnc
@@ -2257,6 +2257,12 @@ Es       |regnode*|handle_regex_sets|NN RExC_state_t 
*pRExC_state \
                                |NULLOK SV ** return_invlist            \
                                |NN I32 *flagp|U32 depth                \
                                |NN char * const oregcomp_parse
+#if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
+Es     |void   |dump_regex_sets_structures                                 \
+                               |NN RExC_state_t *pRExC_state               \
+                               |NN AV * stack                              \
+                               |const IV fence|NN AV * fence_stack
+#endif
 Es     |void|parse_lparen_question_flags|NN RExC_state_t *pRExC_state
 Es     |regnode*|reg_node      |NN RExC_state_t *pRExC_state|U8 op
 Es     |regnode*|regpiece      |NN RExC_state_t *pRExC_state \
diff --git a/embed.h b/embed.h
index 1ce8173..2c25e9e 100644
--- a/embed.h
+++ b/embed.h
@@ -1004,6 +1004,11 @@
 #endif
 #    endif
 #  endif
+#  if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
+#    if defined(PERL_IN_REGCOMP_C)
+#define dump_regex_sets_structures(a,b,c,d)    
S_dump_regex_sets_structures(aTHX_ a,b,c,d)
+#    endif
+#  endif
 #  if defined(PERL_ANY_COW)
 #define sv_setsv_cow(a,b)      Perl_sv_setsv_cow(aTHX_ a,b)
 #  endif
diff --git a/proto.h b/proto.h
index efe02e5..0ea0243 100644
--- a/proto.h
+++ b/proto.h
@@ -4056,6 +4056,13 @@ STATIC void      S_print_collxfrm_input_and_return(pTHX_ 
const char * const s, const
        assert(s); assert(e)
 #  endif
 #endif
+#if defined(DEBUGGING) && defined(ENABLE_REGEX_SETS_DEBUGGING)
+#  if defined(PERL_IN_REGCOMP_C)
+STATIC void    S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state, 
AV * stack, const IV fence, AV * fence_stack);
+#define PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES    \
+       assert(pRExC_state); assert(stack); assert(fence_stack)
+#  endif
+#endif
 #if defined(DEBUG_LEAKING_SCALARS_FORK_DUMP)
 PERL_CALLCONV void     Perl_dump_sv_child(pTHX_ SV *sv);
 #define PERL_ARGS_ASSERT_DUMP_SV_CHILD \
diff --git a/regcomp.c b/regcomp.c
index 79c6d5f..832c678 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8432,9 +8432,9 @@ S_invlist_set_len(pTHX_ SV* const invlist, const UV len, 
const bool offset)
 STATIC void
 S_invlist_replace_list_destroys_src(pTHX_ SV * dest, SV * src)
 {
-    /* Replaces the inversion list in 'src' with the one in 'dest'.  It steals
-     * the list from 'src', so 'src' is made to have a NULL list.  This is
-     * similar to what SvSetMagicSV() would do, if it were implemented on
+    /* Replaces the inversion list in 'dest' with the one from 'src'.  It
+     * steals the list from 'src', so 'src' is made to have a NULL list.  This
+     * is similar to what SvSetMagicSV() would do, if it were implemented on
      * inversion lists, though this routine avoids a copy */
 
     const UV src_len          = _invlist_len(src);
@@ -8941,13 +8941,13 @@ void
 Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                                          const bool complement_b, SV** output)
 {
-    /* Take the union of two inversion lists and point <output> to it.  *output
-     * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
-     * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise just its contents will be modified to be
-     * the union.  The first list, <a>, may be NULL, in which case a copy of
-     * the second list is returned.  If <complement_b> is TRUE, the union is
-     * taken of the complement (inversion) of <b> instead of b itself.
+    /* Take the union of two inversion lists and point '*output' to it.  On
+     * input, '*output' MUST POINT TO NULL OR TO AN SV* INVERSION LIST 
(possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the union.  The first list, 'a', may be
+     * NULL, in which case a copy of the second list is placed in '*output'.
+     * If 'complement_b' is TRUE, the union is taken of the complement
+     * (inversion) of 'b' instead of b itself.
      *
      * The basis for this comes from "Unicode Demystified" Chapter 13 by
      * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -8981,59 +8981,59 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* 
const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_UNION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
+    assert(*output == NULL || SvTYPE(*output) == SVt_INVLIST);
 
     len_b = _invlist_len(b);
     if (len_b == 0) {
 
-        /* Here, 'b' is empty.  If the output is the complement of 'b', the
-         * union is all possible code points, and we need not even look at 'a'.
-         * It's easiest to create a new inversion list that matches everything.
-         * */
+        /* Here, 'b' is empty, hence it's complement is all possible code
+         * points.  So if the union includes the complement of 'b', it includes
+         * everything, and we need not even look at 'a'.  It's easiest to
+         * create a new inversion list that matches everything.  */
         if (complement_b) {
             SV* everything = _add_range_to_invlist(NULL, 0, UV_MAX);
 
-            /* If the output didn't exist, just point it at the new list */
-            if (*output == NULL) {
+            if (*output == NULL) { /* If the output didn't exist, just point it
+                                      at the new list */
                 *output = everything;
-                return;
+            }
+            else { /* Otherwise, replace its contents with the new list */
+                invlist_replace_list_destroys_src(*output, everything);
+                SvREFCNT_dec_NN(everything);
             }
 
-            /* Otherwise, replace its contents with the new list */
-            invlist_replace_list_destroys_src(*output, everything);
-            SvREFCNT_dec_NN(everything);
             return;
         }
 
-        /* Here, we don't want the complement of 'b', and since it is empty,
+        /* Here, we don't want the complement of 'b', and since 'b' is empty,
          * the union will come entirely from 'a'.  If 'a' is NULL or empty, the
          * output will be empty */
 
-        if (a == NULL) {
-            *output = _new_invlist(0);
+        if (a == NULL || _invlist_len(a) == 0) {
+            if (*output == NULL) {
+                *output = _new_invlist(0);
+            }
+            else {
+                invlist_clear(*output);
+            }
             return;
         }
 
-        if (_invlist_len(a) == 0) {
-            invlist_clear(*output);
+        /* Here, 'a' is not empty, but 'b' is, so 'a' entirely determines the
+         * union.  We can just return a copy of 'a' if '*output' doesn't point
+         * to an existing list */
+        if (*output == NULL) {
+            *output = invlist_clone(a);
             return;
         }
 
-        /* Here, 'a' is not empty, and entirely determines the union.  If the
-         * output is not to overwrite 'b', we can just return 'a'. */
-        if (*output != b) {
-
-            /* If the output is to overwrite 'a', we have a no-op, as it's
-             * already in 'a' */
-            if (*output == a) {
-                return;
-            }
-
-            /* But otherwise we have to copy 'a' to the output */
-            *output = invlist_clone(a);
+        /* If the output is to overwrite 'a', we have a no-op, as it's
+         * already in 'a' */
+        if (*output == a) {
             return;
         }
 
-        /* Here, 'b' is to be overwritten by the output, which will be 'a' */
+        /* Here, '*output' is to be overwritten by 'a' */
         u = invlist_clone(a);
         invlist_replace_list_destroys_src(*output, u);
         SvREFCNT_dec_NN(u);
@@ -9041,41 +9041,24 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* 
const a, SV* const b,
         return;
     }
 
+    /* Here 'b' is not empty.  See about 'a' */
+
     if (a == NULL || ((len_a = _invlist_len(a)) == 0)) {
 
         /* Here, 'a' is empty (and b is not).  That means the union will come
-         * entirely from 'b'.  If the output is not to overwrite 'a', we can
-         * just return what's in 'b'.  */
-        if (*output != a) {
-
-            /* If the output is to overwrite 'b', it's already in 'b', but
-             * otherwise we have to copy 'b' to the output */
-            if (*output != b) {
-                *output = invlist_clone(b);
-            }
-
-            /* And if the output is to be the inversion of 'b', do that */
-            if (complement_b) {
-                _invlist_invert(*output);
-            }
+         * entirely from 'b'.  If '*output' is NULL, we can directly return a
+         * clone of 'b'.  Otherwise, we replace the contents of '*output' with
+         * the clone */
 
-            return;
+        SV ** dest = (*output == NULL) ? output : &u;
+        *dest = invlist_clone(b);
+        if (complement_b) {
+            _invlist_invert(*dest);
         }
 
-        /* Here, 'a', which is empty or even NULL, is to be overwritten by the
-         * output, which will either be 'b' or the complement of 'b' */
-
-        if (a == NULL) {
-            *output = invlist_clone(b);
-        }
-        else {
-            u = invlist_clone(b);
+        if (dest == &u) {
             invlist_replace_list_destroys_src(*output, u);
             SvREFCNT_dec_NN(u);
-       }
-
-        if (complement_b) {
-            _invlist_invert(*output);
         }
 
        return;
@@ -9112,8 +9095,8 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* const 
a, SV* const b,
     array_u = _invlist_array_init(u, (    len_a > 0 && array_a[0] == 0)
                                       || (len_b > 0 && array_b[0] == 0));
 
-    /* Go through each input list item by item, stopping when exhausted one of
-     * them */
+    /* Go through each input list item by item, stopping when have exhausted
+     * one of them */
     while (i_a < len_a && i_b < len_b) {
        UV cp;      /* The element to potentially add to the union's array */
        bool cp_in_set;   /* is it in the the input list's set or not */
@@ -9216,30 +9199,17 @@ Perl__invlist_union_maybe_complement_2nd(pTHX_ SV* 
const a, SV* const b,
        array_u = invlist_array(u);
     }
 
-    /* If the output is not to overwrite either of the inputs, just return the
-     * calculated union */
-    if (a != *output && b != *output) {
+    if (*output == NULL) {  /* Simply return the new inversion list */
         *output = u;
     }
     else {
-        /*  Here, the output is to be the same as one of the input scalars,
-         *  hence replacing it.  The simple thing to do is to free the input
-         *  scalar, making it instead be the output one.  But experience has
-         *  shown [perl #127392] that if the input is a mortal, we can get a
-         *  huge build-up of these during regex compilation before they get
-         *  freed.  So for that case, replace just the input's interior with
-         *  the union's, and then free the union */
-
-        assert(! invlist_is_iterating(*output));
-
-        if (! SvTEMP(*output)) {
-            SvREFCNT_dec_NN(*output);
-            *output = u;
-        }
-        else {
-            invlist_replace_list_destroys_src(*output, u);
-            SvREFCNT_dec_NN(u);
-        }
+        /* Otherwise, overwrite the inversion list that was in '*output'.  We
+         * could instead free '*output', and then set it to 'u', but experience
+         * has shown [perl #127392] that if the input is a mortal, we can get a
+         * huge build-up of these during regex compilation before they get
+         * freed. */
+        invlist_replace_list_destroys_src(*output, u);
+        SvREFCNT_dec_NN(u);
     }
 
     return;
@@ -9249,14 +9219,13 @@ void
 Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* const a, SV* const b,
                                                const bool complement_b, SV** i)
 {
-    /* Take the intersection of two inversion lists and point <i> to it.  *i
-     * SHOULD BE DEFINED upon input, and if it points to one of the two lists,
-     * the reference count to that list will be decremented if not already a
-     * temporary (mortal); otherwise just its contents will be modified to be
-     * the intersection.  The first list, <a>, may be NULL, in which case an
-     * empty list is returned.  If <complement_b> is TRUE, the result will be
-     * the intersection of <a> and the complement (or inversion) of <b> instead
-     * of <b> directly.
+    /* Take the intersection of two inversion lists and point '*i' to it.  On
+     * input, '*i' MUST POINT TO NULL OR TO AN SV* INVERSION LIST (possibly
+     * even 'a' or 'b').  If to an inversion list, the contents of the original
+     * list will be replaced by the intersection.  The first list, 'a', may be
+     * NULL, in which case '*i' will be an empty list.  If 'complement_b' is
+     * TRUE, the result will be the intersection of 'a' and the complement (or
+     * inversion) of 'b' instead of 'b' directly.
      *
      * The basis for this comes from "Unicode Demystified" Chapter 13 by
      * Richard Gillam, published by Addison-Wesley, and explained at some
@@ -9290,6 +9259,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* 
const a, SV* const b,
 
     PERL_ARGS_ASSERT__INVLIST_INTERSECTION_MAYBE_COMPLEMENT_2ND;
     assert(a != b);
+    assert(*i == NULL || SvTYPE(*i) == SVt_INVLIST);
 
     /* Special case if either one is empty */
     len_a = (a == NULL) ? 0 : _invlist_len(a);
@@ -9305,13 +9275,11 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ 
SV* const a, SV* const b,
                 return;
             }
 
-            /* If not overwriting either input, just make a copy of 'a' */
-            if (*i != b) {
+            if (*i == NULL) {
                 *i = invlist_clone(a);
                 return;
             }
 
-            /* Here we are overwriting 'b' with 'a's contents */
             r = invlist_clone(a);
             invlist_replace_list_destroys_src(*i, r);
             SvREFCNT_dec_NN(r);
@@ -9360,7 +9328,7 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ SV* 
const a, SV* const b,
     array_r = _invlist_array_init(r,    len_a > 0 && array_a[0] == 0
                                      && len_b > 0 && array_b[0] == 0);
 
-    /* Go through each list item by item, stopping when exhausted one of
+    /* Go through each list item by item, stopping when have exhausted one of
      * them */
     while (i_a < len_a && i_b < len_b) {
        UV cp;      /* The element to potentially add to the intersection's
@@ -9465,47 +9433,21 @@ Perl__invlist_intersection_maybe_complement_2nd(pTHX_ 
SV* const a, SV* const b,
        array_r = invlist_array(r);
     }
 
-    /* Finish outputting any remaining */
-    if (count >= 2) { /* At most one will have a non-zero copy count */
-       IV copy_count;
-       if ((copy_count = len_a - i_a) > 0) {
-           Copy(array_a + i_a, array_r + i_r, copy_count, UV);
-       }
-       else if ((copy_count = len_b - i_b) > 0) {
-           Copy(array_b + i_b, array_r + i_r, copy_count, UV);
-       }
-    }
-
-    /* If the output is not to overwrite either of the inputs, just return the
-     * calculated intersection */
-    if (a != *i && b != *i) {
+    if (*i == NULL) { /* Simply return the calculated intersection */
         *i = r;
     }
-    else {
-        /*  Here, the output is to be the same as one of the input scalars,
-         *  hence replacing it.  The simple thing to do is to free the input
-         *  scalar, making it instead be the output one.  But experience has
-         *  shown [perl #127392] that if the input is a mortal, we can get a
-         *  huge build-up of these during regex compilation before they get
-         *  freed.  So for that case, replace just the input's interior with
-         *  the output's, and then free the output.  A short-cut in this case
-         *  is if the output is empty, we can just set the input to be empty */
-
-        assert(! invlist_is_iterating(*i));
-
-        if (! SvTEMP(*i)) {
-            SvREFCNT_dec_NN(*i);
-            *i = r;
+    else { /* Otherwise, replace the existing inversion list in '*i'.  We could
+              instead free '*i', and then set it to 'r', but experience has
+              shown [perl #127392] that if the input is a mortal, we can get a
+              huge build-up of these during regex compilation before they get
+              freed. */
+        if (len_r) {
+            invlist_replace_list_destroys_src(*i, r);
         }
         else {
-            if (len_r) {
-                invlist_replace_list_destroys_src(*i, r);
-            }
-            else {
-                invlist_clear(*i);
-            }
-            SvREFCNT_dec_NN(r);
+            invlist_clear(*i);
         }
+        SvREFCNT_dec_NN(r);
     }
 
     return;
@@ -14984,6 +14926,12 @@ S_handle_regex_sets(pTHX_ RExC_state_t *pRExC_state, 
SV** return_invlist,
 
 redo_curchar:
 
+#ifdef ENABLE_REGEX_SETS_DEBUGGING
+                    /* Enable with -Accflags=-DENABLE_REGEX_SETS_DEBUGGING */
+        DEBUG_U(dump_regex_sets_structures(pRExC_state,
+                                           stack, fence, fence_stack));
+#endif
+
         top_index = av_tindex_nomg(stack);
 
         switch (curchar) {
@@ -15310,17 +15258,12 @@ redo_curchar:
                     {
                         SV* i = NULL;
                         SV* u = NULL;
-                        SV* element;
 
                         _invlist_union(lhs, rhs, &u);
                         _invlist_intersection(lhs, rhs, &i);
-                        /* _invlist_subtract will overwrite rhs
-                            without freeing what it already contains */
-                        element = rhs;
                         _invlist_subtract(u, i, &rhs);
                         SvREFCNT_dec_NN(i);
                         SvREFCNT_dec_NN(u);
-                        SvREFCNT_dec_NN(element);
                         break;
                     }
                 }
@@ -15517,6 +15460,61 @@ redo_curchar:
     Set_Node_Length(node, RExC_parse - oregcomp_parse + 1); /* MJD */
     return node;
 }
+
+#ifdef ENABLE_REGEX_SETS_DEBUGGING
+
+STATIC void
+S_dump_regex_sets_structures(pTHX_ RExC_state_t *pRExC_state,
+                             AV * stack, const IV fence, AV * fence_stack)
+{   /* Dumps the stacks in handle_regex_sets() */
+
+    const SSize_t stack_top = av_tindex_nomg(stack);
+    const SSize_t fence_stack_top = av_tindex_nomg(fence_stack);
+    SSize_t i;
+
+    PERL_ARGS_ASSERT_DUMP_REGEX_SETS_STRUCTURES;
+
+    PerlIO_printf(Perl_debug_log, "\nParse position is:%s\n", RExC_parse);
+
+    if (stack_top < 0) {
+        PerlIO_printf(Perl_debug_log, "Nothing on stack\n");
+    }
+    else {
+        PerlIO_printf(Perl_debug_log, "Stack: (fence=%d)\n", (int) fence);
+        for (i = stack_top; i >= 0; i--) {
+            SV ** element_ptr = av_fetch(stack, i, FALSE);
+            if (! element_ptr) {
+            }
+
+            if (IS_OPERATOR(*element_ptr)) {
+                PerlIO_printf(Perl_debug_log, "[%d]: %c\n",
+                                            (int) i, (int) SvIV(*element_ptr));
+            }
+            else {
+                PerlIO_printf(Perl_debug_log, "[%d] ", (int) i);
+                sv_dump(*element_ptr);
+            }
+        }
+    }
+
+    if (fence_stack_top < 0) {
+        PerlIO_printf(Perl_debug_log, "Nothing on fence_stack\n");
+    }
+    else {
+        PerlIO_printf(Perl_debug_log, "Fence_stack: \n");
+        for (i = fence_stack_top; i >= 0; i--) {
+            SV ** element_ptr = av_fetch(fence_stack, i, FALSE);
+            if (! element_ptr) {
+            }
+
+            PerlIO_printf(Perl_debug_log, "[%d]: %d\n",
+                                            (int) i, (int) SvIV(*element_ptr));
+        }
+    }
+}
+
+#endif
+
 #undef IS_OPERATOR
 #undef IS_OPERAND
 
diff --git a/t/re/regex_sets.t b/t/re/regex_sets.t
index 810e301..6a79f9d 100644
--- a/t/re/regex_sets.t
+++ b/t/re/regex_sets.t
@@ -199,6 +199,11 @@ for my $char ("Ù ", "Ù¥", "Ù©") {
     unlike("g", qr/$pat/, "'g' doesn't match /$pat/");
 }
 
+{   # [perl #129322 ]  This crashed perl, so keep after the ones that don't
+    my $pat = '(?[[!]&[0]^[!]&[0]+[a]])';
+    like("a", qr/$pat/, "/$pat/ compiles and matches 'a'");
+}
+
 done_testing();
 
 1;

--
Perl5 Master Repository

Reply via email to