Change 29948 by [EMAIL PROTECTED] on 2007/01/24 15:13:14

        Integrate:
        [ 27753]
        Subject:  [PATCH] True consting goodness in regexes
        From:  [EMAIL PROTECTED] (Andy Lester)
        Date:  Mon, 10 Apr 2006 02:15:58 -0500
        Message-Id:  <[EMAIL PROTECTED]>
        
        [ 27762]
        All S_dumpuntil()'s regnode pointer arguments can be const.

Affected files ...

... //depot/maint-5.8/perl/embed.fnc#182 integrate
... //depot/maint-5.8/perl/proto.h#171 edit
... //depot/maint-5.8/perl/regcomp.c#82 edit
... //depot/maint-5.8/perl/regexec.c#71 integrate

Differences ...

==== //depot/maint-5.8/perl/embed.fnc#182 (text) ====
Index: perl/embed.fnc
--- perl/embed.fnc#181~29947~   2007-01-24 05:54:09.000000000 -0800
+++ perl/embed.fnc      2007-01-24 07:13:14.000000000 -0800
@@ -641,7 +641,7 @@
 Ap     |void   |push_scope
 Amb    |OP*    |ref            |NULLOK OP* o|I32 type
 p      |OP*    |refkids        |NULLOK OP* o|I32 type
-Ap     |void   |regdump        |NN regexp* r
+Ap     |void   |regdump        |NN const regexp* r
 Ap     |SV*    |regclass_swash |NN struct regnode *n|bool doinit|NULLOK SV 
**listsvp|NULLOK SV **altsvp
 Ap     |I32    |pregexec       |NN regexp* prog|NN char* stringarg \
                                |NN char* strend|NN char* strbeg|I32 minend \
@@ -1235,23 +1235,24 @@
 Es     |regnode*|reg_node      |NN struct RExC_state_t *state|U8 op
 Es     |regnode*|regpiece      |NN struct RExC_state_t *state|NN I32 *flagp
 Es     |void   |reginsert      |NN struct RExC_state_t *state|U8 op|NN regnode 
*opnd
-Es     |void   |regoptail      |NN struct RExC_state_t *state|NN regnode *p|NN 
regnode *val
-Es     |void   |regtail        |NN struct RExC_state_t *state|NN regnode *p|NN 
regnode *val
+Es     |void   |regoptail      |NN const struct RExC_state_t *state|NN regnode 
*p|NN const regnode *val
+Es     |void   |regtail        |NN const struct RExC_state_t *state|NN regnode 
*p|NN const regnode *val
 EsRn   |char*  |regwhite       |NN char *p|NN const char *e
 Es     |char*  |nextchar       |NN struct RExC_state_t *state
 #  ifdef DEBUGGING
-Es     |regnode*|dumpuntil     |NN regnode *start|NN regnode *node \
-                               |NULLOK regnode *last|NN SV* sv|I32 l
+Es     |const regnode*|dumpuntil|NN const regnode *start \
+                               |NN const regnode *node \
+                               |NULLOK const regnode *last|NN SV* sv|I32 l
 Es     |void   |put_byte       |NN SV* sv|int c
 #  endif
-Es     |void   |scan_commit    |NN struct RExC_state_t* state|NN struct 
scan_data_t *data
-Esn    |void   |cl_anything    |NN struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
+Es     |void   |scan_commit    |NN const struct RExC_state_t* state|NN struct 
scan_data_t *data
+Esn    |void   |cl_anything    |NN const struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
 EsRn   |int    |cl_is_anything |NN const struct regnode_charclass_class *cl
-Esn    |void   |cl_init        |NN struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
-Esn    |void   |cl_init_zero   |NN struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
+Esn    |void   |cl_init        |NN const struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
+Esn    |void   |cl_init_zero   |NN const struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl
 Esn    |void   |cl_and         |NN struct regnode_charclass_class *cl \
                                |NN const struct regnode_charclass_class 
*and_with
-Esn    |void   |cl_or          |NN struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl \
+Esn    |void   |cl_or          |NN const struct RExC_state_t* state|NN struct 
regnode_charclass_class *cl \
                                |NN const struct regnode_charclass_class 
*or_with
 Es     |I32    |study_chunk    |NN struct RExC_state_t* state|NN regnode 
**scanp \
                                |NN I32 *deltap|NN regnode *last|NULLOK struct 
scan_data_t *data \
@@ -1273,7 +1274,7 @@
 Es     |void   |cache_re       |NN regexp *prog
 ERsn   |U8*    |reghop3        |NN U8 *pos|I32 off|NN U8 *lim
 ERsn   |U8*    |reghopmaybe3   |NN U8 *pos|I32 off|NN U8 *lim
-ERs    |char*  |find_byclass   |NN regexp * prog|NN regnode *c|NN char *s|NN 
const char *strend|I32 norun
+ERs    |char*  |find_byclass   |NN regexp * prog|NN const regnode *c|NN char 
*s|NN const char *strend|I32 norun
 Es     |void   |to_utf8_substr |NN regexp * prog
 Es     |void   |to_byte_substr |NN regexp * prog
 #endif

==== //depot/maint-5.8/perl/proto.h#171 (text+w) ====
Index: perl/proto.h
--- perl/proto.h#170~29947~     2007-01-24 05:54:09.000000000 -0800
+++ perl/proto.h        2007-01-24 07:13:14.000000000 -0800
@@ -1042,7 +1042,7 @@
 PERL_CALLCONV void     Perl_push_scope(pTHX);
 /* PERL_CALLCONV OP*   ref(pTHX_ OP* o, I32 type); */
 PERL_CALLCONV OP*      Perl_refkids(pTHX_ OP* o, I32 type);
-PERL_CALLCONV void     Perl_regdump(pTHX_ regexp* r);
+PERL_CALLCONV void     Perl_regdump(pTHX_ const regexp* r);
 PERL_CALLCONV SV*      Perl_regclass_swash(pTHX_ struct regnode *n, bool 
doinit, SV **listsvp, SV **altsvp);
 PERL_CALLCONV I32      Perl_pregexec(pTHX_ regexp* prog, char* stringarg, 
char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave);
 PERL_CALLCONV void     Perl_pregfree(pTHX_ struct regexp* r);
@@ -1812,25 +1812,25 @@
 STATIC regnode*        S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
 STATIC regnode*        S_regpiece(pTHX_ struct RExC_state_t *state, I32 
*flagp);
 STATIC void    S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode 
*opnd);
-STATIC void    S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p, 
regnode *val);
-STATIC void    S_regtail(pTHX_ struct RExC_state_t *state, regnode *p, regnode 
*val);
+STATIC void    S_regoptail(pTHX_ const struct RExC_state_t *state, regnode *p, 
const regnode *val);
+STATIC void    S_regtail(pTHX_ const struct RExC_state_t *state, regnode *p, 
const regnode *val);
 STATIC char*   S_regwhite(char *p, const char *e)
                        __attribute__warn_unused_result__;
 
 STATIC char*   S_nextchar(pTHX_ struct RExC_state_t *state);
 #  ifdef DEBUGGING
-STATIC regnode*        S_dumpuntil(pTHX_ regnode *start, regnode *node, 
regnode *last, SV* sv, I32 l);
+STATIC const regnode*  S_dumpuntil(pTHX_ const regnode *start, const regnode 
*node, const regnode *last, SV* sv, I32 l);
 STATIC void    S_put_byte(pTHX_ SV* sv, int c);
 #  endif
-STATIC void    S_scan_commit(pTHX_ struct RExC_state_t* state, struct 
scan_data_t *data);
-STATIC void    S_cl_anything(struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
+STATIC void    S_scan_commit(pTHX_ const struct RExC_state_t* state, struct 
scan_data_t *data);
+STATIC void    S_cl_anything(const struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
 STATIC int     S_cl_is_anything(const struct regnode_charclass_class *cl)
                        __attribute__warn_unused_result__;
 
-STATIC void    S_cl_init(struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
-STATIC void    S_cl_init_zero(struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
+STATIC void    S_cl_init(const struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
+STATIC void    S_cl_init_zero(const struct RExC_state_t* state, struct 
regnode_charclass_class *cl);
 STATIC void    S_cl_and(struct regnode_charclass_class *cl, const struct 
regnode_charclass_class *and_with);
-STATIC void    S_cl_or(struct RExC_state_t* state, struct 
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
+STATIC void    S_cl_or(const struct RExC_state_t* state, struct 
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
 STATIC I32     S_study_chunk(pTHX_ struct RExC_state_t* state, regnode 
**scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags);
 STATIC I32     S_add_data(struct RExC_state_t* state, I32 n, const char *s)
                        __attribute__warn_unused_result__;
@@ -1864,7 +1864,7 @@
 STATIC U8*     S_reghopmaybe3(U8 *pos, I32 off, U8 *lim)
                        __attribute__warn_unused_result__;
 
-STATIC char*   S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const 
char *strend, I32 norun)
+STATIC char*   S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, 
const char *strend, I32 norun)
                        __attribute__warn_unused_result__;
 
 STATIC void    S_to_utf8_substr(pTHX_ regexp * prog);
@@ -1874,9 +1874,7 @@
 #if defined(PERL_IN_DUMP_C) || defined(PERL_DECL_PROT)
 STATIC CV*     S_deb_curcv(pTHX_ I32 ix);
 STATIC void    S_debprof(pTHX_ const OP *o);
-STATIC SV*     S_pm_description(pTHX_ const PMOP *pm)
-                       __attribute__nonnull__(pTHX_1);
-
+STATIC SV*     S_pm_description(pTHX_ const PMOP *pm);
 #endif
 
 #if defined(PERL_IN_SCOPE_C) || defined(PERL_DECL_PROT)

==== //depot/maint-5.8/perl/regcomp.c#82 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#81~29925~    2007-01-22 14:10:59.000000000 -0800
+++ perl/regcomp.c      2007-01-24 07:13:14.000000000 -0800
@@ -444,7 +444,7 @@
    floating substrings if needed. */
 
 STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
 {
     const STRLEN l = CHR_SVLEN(data->last_found);
     const STRLEN old_l = CHR_SVLEN(*data->longest);
@@ -476,10 +476,11 @@
     SvCUR_set(data->last_found, 0);
     {
        SV * const sv = data->last_found;
-       MAGIC * const mg =
-           SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
-       if (mg)
-           mg->mg_len = 0;
+       if (SvUTF8(sv) && SvMAGICAL(sv)) {
+           MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+           if (mg)
+               mg->mg_len = 0;
+       }
     }
     data->last_end = -1;
     data->flags &= ~SF_BEFORE_EOL;
@@ -487,7 +488,7 @@
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class 
*cl)
 {
     ANYOF_CLASS_ZERO(cl);
     ANYOF_BITMAP_SETALL(cl);
@@ -514,7 +515,7 @@
 
 /* Can match anything (initialization) */
 STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -522,7 +523,7 @@
 }
 
 STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class 
*cl)
 {
     Zero(cl, 1, struct regnode_charclass_class);
     cl->type = ANYOF;
@@ -571,7 +572,7 @@
 /* 'OR' a given class with another one.  Can create false positives */
 /* We assume that cl is not inverted */
 STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const 
struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, 
const struct regnode_charclass_class *or_with)
 {
     if (or_with->flags & ANYOF_INVERT) {
        /* We do not use
@@ -3368,7 +3369,7 @@
        /* I smell either [: or [= or [. -- POSIX has been here, right? */
        POSIXCC(UCHARAT(RExC_parse))) {
        const char c = UCHARAT(RExC_parse);
-       char* s = RExC_parse++;
+       char* const s = RExC_parse++;
        
        while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
            RExC_parse++;
@@ -3670,12 +3671,8 @@
                              n--;
                         }
                    }
-                   if (value == 'p')
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "+utf8::%.*s\n", (int)n, RExC_parse);
-                   else
-                        Perl_sv_catpvf(aTHX_ listsv,
-                                       "!utf8::%.*s\n", (int)n, RExC_parse);
+                   Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+                       (value=='p' ? '+' : '!'), (int)n, RExC_parse);
                }
                RExC_parse = e + 1;
                ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
@@ -3744,14 +3741,12 @@
            if (range) {
                if (!SIZE_ONLY) {
                    if (ckWARN(WARN_REGEXP)) {
-                       int w =
+                       const int w =
                            RExC_parse >= rangebegin ?
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (prevvalue < 256) {
                        ANYOF_BITMAP_SET(ret, prevvalue);
@@ -4160,9 +4155,7 @@
                            RExC_parse - rangebegin : 0;
                        vWARN4(RExC_parse,
                               "False [] range \"%*.*s\"",
-                              w,
-                              w,
-                              rangebegin);
+                              w, w, rangebegin);
                    }
                    if (!SIZE_ONLY)
                        ANYOF_BITMAP_SET(ret, '-');
@@ -4306,7 +4299,7 @@
     }
 
     if (!SIZE_ONLY) {
-       AV *av = newAV();
+       AV * const av = newAV();
        SV *rv;
 
        /* The 0th element stores the character class description
@@ -4330,7 +4323,7 @@
 STATIC char*
 S_nextchar(pTHX_ RExC_state_t *pRExC_state)
 {
-    char* retval = RExC_parse++;
+    char* const retval = RExC_parse++;
 
     for (;;) {
        if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
@@ -4501,8 +4494,9 @@
 /*
 - regtail - set the next-pointer at the end of a node chain of p to val.
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode 
*val)
 {
     register regnode *scan;
 
@@ -4529,8 +4523,9 @@
 /*
 - regoptail - regtail on operand of first argument; nop if operandless
 */
+/* TODO: All three parms should be const */
 STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode 
*val)
 {
     /* "Operandless" and "op != BRANCH" are synonymous in practice. */
     if (p == NULL || SIZE_ONLY)
@@ -4570,7 +4565,7 @@
  - regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
  */
 void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
 {
 #ifdef DEBUGGING
     SV * const sv = sv_newmortal();
@@ -5137,11 +5132,12 @@
 }
 
 
-STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regnode *start, const regnode *node,
+           const regnode *last, SV* sv, I32 l)
 {
     register U8 op = EXACT;    /* Arbitrary non-END op. */
-    register regnode *next;
+    register const regnode *next;
 
     while (op != END && (!last || node < last)) {
        /* While that wasn't END last time... */
@@ -5150,11 +5146,11 @@
        op = OP(node);
        if (op == CLOSE)
            l--;        
-       next = regnext(node);
+       next = regnext((regnode *)node);
        /* Where, what. */
        if (OP(node) == OPTIMIZED)
            goto after_print;
-       regprop(sv, node);
+       regprop(sv, (regnode *) node);
        PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
                      (int)(2*l + 1), "", SvPVX_const(sv));
        if (next == NULL)               /* Next ptr. */
@@ -5164,9 +5160,9 @@
        (void)PerlIO_putc(Perl_debug_log, '\n');
       after_print:
        if (PL_regkind[(U8)op] == BRANCHJ) {
-           register regnode *nnode = (OP(next) == LONGJMP
-                                      ? regnext(next)
-                                      : next);
+           register const regnode *nnode = (OP(next) == LONGJMP
+                                            ? regnext((regnode *)next)
+                                            : next);
            if (last && nnode > last)
                nnode = last;
            node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);

==== //depot/maint-5.8/perl/regexec.c#71 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#70~29946~    2007-01-24 05:23:35.000000000 -0800
+++ perl/regexec.c      2007-01-24 07:13:14.000000000 -0800
@@ -215,7 +215,6 @@
 S_regcppop(pTHX)
 {
     I32 i;
-    U32 paren = 0;
     char *input;
 
     /* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
@@ -231,7 +230,7 @@
     for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
         i > 0; i -= REGCP_PAREN_ELEMS) {
        I32 tmps;
-       paren = (U32)SSPOPINT;
+       U32 paren = (U32)SSPOPINT;
        PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
        PL_regstartp[paren] = SSPOPINT;
        tmps = SSPOPINT;
@@ -264,10 +263,10 @@
      * building DynaLoader will fail:
      * "Error: '*' not in typemap in DynaLoader.xs, line 164"
      * --jhi */
-    for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
-       if ((I32)paren > PL_regsize)
-           PL_regstartp[paren] = -1;
-       PL_regendp[paren] = -1;
+    for (i = *PL_reglastparen + 1; i <= PL_regnpar; i++) {
+       if (i > PL_regsize)
+           PL_regstartp[i] = -1;
+       PL_regendp[i] = -1;
     }
 #endif
     return input;
@@ -948,7 +947,7 @@
 
 /* We know what class REx starts with.  Try to find this position... */
 STATIC char *
-S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend, 
I32 norun)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char 
*strend, I32 norun)
 {
        const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
        char *m;
@@ -2071,7 +2070,6 @@
 STATIC I32                     /* 0 failure, 1 success */
 S_regtry(pTHX_ regexp *prog, char *startpos)
 {
-    register I32 i;
     register I32 *sp;
     register I32 *ep;
     CHECKPOINT lastcp;
@@ -2183,6 +2181,7 @@
     sp = prog->startp;
     ep = prog->endp;
     if (prog->nparens) {
+       register I32 i;
        for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
            *++sp = -1;
            *++ep = -1;
@@ -4532,8 +4531,8 @@
 S_to_utf8_substr(pTHX_ register regexp *prog)
 {
     if (prog->float_substr && !prog->float_utf8) {
-       SV* sv;
-       prog->float_utf8 = sv = newSVsv(prog->float_substr);
+       SV* const sv = newSVsv(prog->float_substr);
+       prog->float_utf8 = sv;
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->float_substr))
            SvTAIL_on(sv);
@@ -4541,8 +4540,8 @@
            prog->check_utf8 = sv;
     }
     if (prog->anchored_substr && !prog->anchored_utf8) {
-       SV* sv;
-       prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
+       SV* const sv = newSVsv(prog->anchored_substr);
+       prog->anchored_utf8 = sv;
        sv_utf8_upgrade(sv);
        if (SvTAIL(prog->anchored_substr))
            SvTAIL_on(sv);
@@ -4555,8 +4554,8 @@
 S_to_byte_substr(pTHX_ register regexp *prog)
 {
     if (prog->float_utf8 && !prog->float_substr) {
-       SV* sv;
-       prog->float_substr = sv = newSVsv(prog->float_utf8);
+       SV* sv = newSVsv(prog->float_utf8);
+       prog->float_substr = sv;
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->float_utf8))
                SvTAIL_on(sv);
@@ -4568,8 +4567,8 @@
            prog->check_substr = sv;
     }
     if (prog->anchored_utf8 && !prog->anchored_substr) {
-       SV* sv;
-       prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
+       SV* sv = newSVsv(prog->anchored_utf8);
+       prog->anchored_substr = sv;
        if (sv_utf8_downgrade(sv, TRUE)) {
            if (SvTAIL(prog->anchored_utf8))
                SvTAIL_on(sv);
End of Patch.

Reply via email to