In perl.git, the branch smoke-me/remove-regcomp-setjmp has been updated

<http://perl5.git.perl.org/perl.git/commitdiff/2c4e40455f15d9b1cdc122814d345bdacdd5fad8?hp=1f605877dc7b1d1b806d6f0e30dc1d5adbfe6ef3>

- Log -----------------------------------------------------------------
commit 2c4e40455f15d9b1cdc122814d345bdacdd5fad8
Author: Nicholas Clark <[email protected]>
Date:   Thu Jan 17 11:47:13 2013 +0100

    Document when and why S_reg{,branch,piece,atom,class}() return NULL.
    
    As documented in pod/perlreguts.pod, the call graph for regex parsing
    involves several levels of functions in regcomp.c, sometimes recursing more
    than once.
    
    The top level compiling function, S_reg(), calls S_regbranch() to parse each
    single branch of an alternation. In turn, that calls S_regpiece() to parse
    a simple pattern followed by quantifier, which calls S_regatom() to parse
    that simple pattern. S_regatom() can call S_regclass() to handle classes,
    but can also recurse into S_reg() to handle subpatterns and some other
    constructions. Some other routines call call S_reg(), sometimes using an
    alternative pattern that they generate dynamically to represent their input.
    
    These routines all return a pointer to a regnode structure, and take a
    pointer to an integer that holds flags, which is also used to return
    information.
    
    Historically, it has not been clear when and why they return NULL, and
    whether the return value can be ignored. In particular, "Jumbo regexp patch"
    (commit c277df42229d99fe, from Nov 1997), added code with two calls from
    S_reg() to S_regbranch(), one of which checks the return value and generates
    a LONGJMP node if it returns NULL, the other of which is called in void
    context, and so both ignores any return value, or the possibility that it is
    NULL.
    
    After some analysis I have untangled the possible return values from these
    5 functions (and related functions which call S_reg()).
    
    Starting from the top:
    S_reg() will return NULL and set the flags to TRYAGAIN at the end of pragma-
    line constructions that it handles. Otherwise, historically it would return
    NULL if S_regbranch() returned NULL. In turn, S_regbranch() would return
    NULL if S_regpiece() returned NULL without setting TRYAGAIN. If S_regpiece()
    returns TRYAGAIN, S_regbranch() loops, and ultimately will not return NULL.
    
    S_regpiece() returns NULL with TRYAGAIN if S_regatom() returns NULL with
    TRYAGAIN, but (historically) if S_regatom() returns NULL without setting
    the flags to TRYAGAIN, S_regpiece() would to. Where S_regatom() calls
    S_reg() it has similar behaviour when passing back return values, although
    often it is able to loop instead on getting a TRYAGAIN.
    
    Which gets us back to S_reg(), which can only *generate* NULL in conjunction
    with TRYAGAIN. NULL without TRYAGAIN could only be returned if a routine it
    called generated it. All other functions that these call that return regnode
    structures cannot return NULL. Hence
    
    1) in the loop of functions called, there is no source for a return value of
       NULL without the TRYAGAIN flag being set
    2) a return value of NULL with TRYAGAIN set from an inner function does not
       propagate out past S_regbranch()
    
    Hence the only return values that most functions can generate are non-NULL,
    or NULL with TRYAGAIN set, and as S_regbranch() catches these, it cannot
    return NULL. The longest sequence of functions that can return NULL (with
    TRYAGAIN set) is S_reg() -> S_regatom() -> S_regpiece() -> S_regbranch().
    Rapidly returning right round the loop back to S_reg() is not possible.
    
    Hence code added by commit c277df42229d99fe to handle a NULL return from
    S_regbranch(), along with some other code is dead.
    
    I have replaced all unreachable code with FAIL()s that panic.
-----------------------------------------------------------------------

Summary of changes:
 regcomp.c |   31 ++++++++++++++++++++++++-------
 1 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/regcomp.c b/regcomp.c
index 22a3191..2c0cd97 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -8321,6 +8321,9 @@ S__invlistEQ(pTHX_ SV* const a, SV* const b, bool 
complement_b)
 #define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
 #endif
 
+/* Returns NULL, setting *flagp to TRYAGAIN at the end of (?) that only sets
+   flags. Otherwise would only return NULL if regbranch() returns NULL, which
+   cannot happen.  */
 STATIC regnode *
 S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
     /* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
@@ -8868,7 +8871,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
                     REGTAIL(pRExC_state, ret, reganode(pRExC_state, IFTHEN, 
0));
                     br = regbranch(pRExC_state, &flags, 1,depth+1);
                    if (br == NULL)
-                       br = reganode(pRExC_state, LONGJMP, 0);
+                        FAIL("panic: regbranch returned NULL");
                    else
                         REGTAIL(pRExC_state, br, reganode(pRExC_state, 
LONGJMP, 0));
                    c = *nextchar(pRExC_state);
@@ -8878,7 +8881,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
                        if (is_define) 
                            vFAIL("(?(DEFINE)....) does not allow branches");
                        lastbr = reganode(pRExC_state, IFTHEN, 0); /* Fake one 
for optimizer. */
-                        regbranch(pRExC_state, &flags, 1,depth+1);
+                        if (!regbranch(pRExC_state, &flags, 1,depth+1))
+                            FAIL("panic: regbranch returned NULL");
                         REGTAIL(pRExC_state, ret, lastbr);
                        if (flags&HASWIDTH)
                            *flagp |= HASWIDTH;
@@ -9128,7 +9132,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
     /*     branch_len = (paren != 0); */
 
     if (br == NULL)
-       return(NULL);
+        FAIL("panic: regbranch returned NULL");
     if (*RExC_parse == '|') {
        if (!SIZE_ONLY && RExC_extralen) {
            reginsert(pRExC_state, BRANCHJ, br, depth+1);
@@ -9168,7 +9172,7 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
         br = regbranch(pRExC_state, &flags, 0, depth+1);
 
        if (br == NULL)
-           return(NULL);
+            FAIL("panic: regbranch returned NULL");
         REGTAIL(pRExC_state, lastbr, br);               /* BRANCH -> BRANCH. */
        lastbr = br;
        *flagp |= flags & (SPSTART | HASWIDTH | POSTPONED);
@@ -9325,6 +9329,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 
*flagp,U32 depth)
  - regbranch - one alternative of an | operator
  *
  * Implements the concatenation operator.
+ *
+ * would only return NULL if regpiece() returns NULL, which cannot happen.
  */
 STATIC regnode *
 S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, I32 first, U32 depth)
@@ -9364,7 +9370,7 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
I32 first, U32 depth)
        if (latest == NULL) {
            if (flags & TRYAGAIN)
                continue;
-           return(NULL);
+            FAIL("panic: regpiece returned NULL");
        }
        else if (ret == NULL)
            ret = latest;
@@ -9398,6 +9404,9 @@ S_regbranch(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
I32 first, U32 depth)
  * both the endmarker for their branch list and the body of the last branch.
  * It might seem that this node could be dispensed with entirely, but the
  * endmarker role is not redundant.
+ *
+ * Returns NULL, setting *flagp to TRYAGAIN if regatom() returns NULL with
+ * TRYAGAIN.
  */
 STATIC regnode *
 S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, U32 depth)
@@ -9428,6 +9437,8 @@ S_regpiece(pTHX_ RExC_state_t *pRExC_state, I32 *flagp, 
U32 depth)
     if (ret == NULL) {
        if (flags & TRYAGAIN)
            *flagp |= TRYAGAIN;
+        else
+            FAIL("panic: regatom returned NULL without setting TRYAGAIN");
        return(NULL);
     }
 
@@ -10048,6 +10059,9 @@ S_alloc_maybe_populate_EXACT(pTHX_ RExC_state_t 
*pRExC_state, regnode *node, I32
    escape sequences, with the one for handling literal escapes requiring
    a dummy entry for all of the special escapes that are actually handled
    by the other.
+
+   Returns NULL, setting *flagp to TRYAGAIN if reg() returns NULL with
+   TRYAGAIN.  Otherwise does not return NULL.
 */
 
 STATIC regnode *
@@ -10131,7 +10145,7 @@ tryagain:
                    }
                    goto tryagain;
                }
-               return(NULL);
+                FAIL("panic: reg returned NULL without setting TRYAGAIN");
        }
        *flagp |= flags&(HASWIDTH|SPSTART|SIMPLE|POSTPONED);
        break;
@@ -11766,7 +11780,10 @@ S_regclass(pTHX_ RExC_state_t *pRExC_state, I32 
*flagp, U32 depth,
      * corresponding bit set if that character is in the list.  For characters
      * above 255, a range list or swash is used.  There are extra bits for \w,
      * etc. in locale ANYOFs, as what these match is not determinable at
-     * compile time */
+     * compile time
+     *
+     * Never returns NULL.
+     */
 
     dVAR;
     UV prevvalue = OOB_UNICODE, save_prevvalue = OOB_UNICODE;

--
Perl5 Master Repository

Reply via email to