Change 29948 by [EMAIL PROTECTED] on 2007/01/24 15:13:14
Integrate:
[ 27753]
Subject: [PATCH] True consting goodness in regexes
From: [EMAIL PROTECTED] (Andy Lester)
Date: Mon, 10 Apr 2006 02:15:58 -0500
Message-Id: <[EMAIL PROTECTED]>
[ 27762]
All S_dumpuntil()'s regnode pointer arguments can be const.
Affected files ...
... //depot/maint-5.8/perl/embed.fnc#182 integrate
... //depot/maint-5.8/perl/proto.h#171 edit
... //depot/maint-5.8/perl/regcomp.c#82 edit
... //depot/maint-5.8/perl/regexec.c#71 integrate
Differences ...
==== //depot/maint-5.8/perl/embed.fnc#182 (text) ====
Index: perl/embed.fnc
--- perl/embed.fnc#181~29947~ 2007-01-24 05:54:09.000000000 -0800
+++ perl/embed.fnc 2007-01-24 07:13:14.000000000 -0800
@@ -641,7 +641,7 @@
Ap |void |push_scope
Amb |OP* |ref |NULLOK OP* o|I32 type
p |OP* |refkids |NULLOK OP* o|I32 type
-Ap |void |regdump |NN regexp* r
+Ap |void |regdump |NN const regexp* r
Ap |SV* |regclass_swash |NN struct regnode *n|bool doinit|NULLOK SV
**listsvp|NULLOK SV **altsvp
Ap |I32 |pregexec |NN regexp* prog|NN char* stringarg \
|NN char* strend|NN char* strbeg|I32 minend \
@@ -1235,23 +1235,24 @@
Es |regnode*|reg_node |NN struct RExC_state_t *state|U8 op
Es |regnode*|regpiece |NN struct RExC_state_t *state|NN I32 *flagp
Es |void |reginsert |NN struct RExC_state_t *state|U8 op|NN regnode
*opnd
-Es |void |regoptail |NN struct RExC_state_t *state|NN regnode *p|NN
regnode *val
-Es |void |regtail |NN struct RExC_state_t *state|NN regnode *p|NN
regnode *val
+Es |void |regoptail |NN const struct RExC_state_t *state|NN regnode
*p|NN const regnode *val
+Es |void |regtail |NN const struct RExC_state_t *state|NN regnode
*p|NN const regnode *val
EsRn |char* |regwhite |NN char *p|NN const char *e
Es |char* |nextchar |NN struct RExC_state_t *state
# ifdef DEBUGGING
-Es |regnode*|dumpuntil |NN regnode *start|NN regnode *node \
- |NULLOK regnode *last|NN SV* sv|I32 l
+Es |const regnode*|dumpuntil|NN const regnode *start \
+ |NN const regnode *node \
+ |NULLOK const regnode *last|NN SV* sv|I32 l
Es |void |put_byte |NN SV* sv|int c
# endif
-Es |void |scan_commit |NN struct RExC_state_t* state|NN struct
scan_data_t *data
-Esn |void |cl_anything |NN struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
+Es |void |scan_commit |NN const struct RExC_state_t* state|NN struct
scan_data_t *data
+Esn |void |cl_anything |NN const struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
EsRn |int |cl_is_anything |NN const struct regnode_charclass_class *cl
-Esn |void |cl_init |NN struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
-Esn |void |cl_init_zero |NN struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
+Esn |void |cl_init |NN const struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
+Esn |void |cl_init_zero |NN const struct RExC_state_t* state|NN struct
regnode_charclass_class *cl
Esn |void |cl_and |NN struct regnode_charclass_class *cl \
|NN const struct regnode_charclass_class
*and_with
-Esn |void |cl_or |NN struct RExC_state_t* state|NN struct
regnode_charclass_class *cl \
+Esn |void |cl_or |NN const struct RExC_state_t* state|NN struct
regnode_charclass_class *cl \
|NN const struct regnode_charclass_class
*or_with
Es |I32 |study_chunk |NN struct RExC_state_t* state|NN regnode
**scanp \
|NN I32 *deltap|NN regnode *last|NULLOK struct
scan_data_t *data \
@@ -1273,7 +1274,7 @@
Es |void |cache_re |NN regexp *prog
ERsn |U8* |reghop3 |NN U8 *pos|I32 off|NN U8 *lim
ERsn |U8* |reghopmaybe3 |NN U8 *pos|I32 off|NN U8 *lim
-ERs |char* |find_byclass |NN regexp * prog|NN regnode *c|NN char *s|NN
const char *strend|I32 norun
+ERs |char* |find_byclass |NN regexp * prog|NN const regnode *c|NN char
*s|NN const char *strend|I32 norun
Es |void |to_utf8_substr |NN regexp * prog
Es |void |to_byte_substr |NN regexp * prog
#endif
==== //depot/maint-5.8/perl/proto.h#171 (text+w) ====
Index: perl/proto.h
--- perl/proto.h#170~29947~ 2007-01-24 05:54:09.000000000 -0800
+++ perl/proto.h 2007-01-24 07:13:14.000000000 -0800
@@ -1042,7 +1042,7 @@
PERL_CALLCONV void Perl_push_scope(pTHX);
/* PERL_CALLCONV OP* ref(pTHX_ OP* o, I32 type); */
PERL_CALLCONV OP* Perl_refkids(pTHX_ OP* o, I32 type);
-PERL_CALLCONV void Perl_regdump(pTHX_ regexp* r);
+PERL_CALLCONV void Perl_regdump(pTHX_ const regexp* r);
PERL_CALLCONV SV* Perl_regclass_swash(pTHX_ struct regnode *n, bool
doinit, SV **listsvp, SV **altsvp);
PERL_CALLCONV I32 Perl_pregexec(pTHX_ regexp* prog, char* stringarg,
char* strend, char* strbeg, I32 minend, SV* screamer, U32 nosave);
PERL_CALLCONV void Perl_pregfree(pTHX_ struct regexp* r);
@@ -1812,25 +1812,25 @@
STATIC regnode* S_reg_node(pTHX_ struct RExC_state_t *state, U8 op);
STATIC regnode* S_regpiece(pTHX_ struct RExC_state_t *state, I32
*flagp);
STATIC void S_reginsert(pTHX_ struct RExC_state_t *state, U8 op, regnode
*opnd);
-STATIC void S_regoptail(pTHX_ struct RExC_state_t *state, regnode *p,
regnode *val);
-STATIC void S_regtail(pTHX_ struct RExC_state_t *state, regnode *p, regnode
*val);
+STATIC void S_regoptail(pTHX_ const struct RExC_state_t *state, regnode *p,
const regnode *val);
+STATIC void S_regtail(pTHX_ const struct RExC_state_t *state, regnode *p,
const regnode *val);
STATIC char* S_regwhite(char *p, const char *e)
__attribute__warn_unused_result__;
STATIC char* S_nextchar(pTHX_ struct RExC_state_t *state);
# ifdef DEBUGGING
-STATIC regnode* S_dumpuntil(pTHX_ regnode *start, regnode *node,
regnode *last, SV* sv, I32 l);
+STATIC const regnode* S_dumpuntil(pTHX_ const regnode *start, const regnode
*node, const regnode *last, SV* sv, I32 l);
STATIC void S_put_byte(pTHX_ SV* sv, int c);
# endif
-STATIC void S_scan_commit(pTHX_ struct RExC_state_t* state, struct
scan_data_t *data);
-STATIC void S_cl_anything(struct RExC_state_t* state, struct
regnode_charclass_class *cl);
+STATIC void S_scan_commit(pTHX_ const struct RExC_state_t* state, struct
scan_data_t *data);
+STATIC void S_cl_anything(const struct RExC_state_t* state, struct
regnode_charclass_class *cl);
STATIC int S_cl_is_anything(const struct regnode_charclass_class *cl)
__attribute__warn_unused_result__;
-STATIC void S_cl_init(struct RExC_state_t* state, struct
regnode_charclass_class *cl);
-STATIC void S_cl_init_zero(struct RExC_state_t* state, struct
regnode_charclass_class *cl);
+STATIC void S_cl_init(const struct RExC_state_t* state, struct
regnode_charclass_class *cl);
+STATIC void S_cl_init_zero(const struct RExC_state_t* state, struct
regnode_charclass_class *cl);
STATIC void S_cl_and(struct regnode_charclass_class *cl, const struct
regnode_charclass_class *and_with);
-STATIC void S_cl_or(struct RExC_state_t* state, struct
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
+STATIC void S_cl_or(const struct RExC_state_t* state, struct
regnode_charclass_class *cl, const struct regnode_charclass_class *or_with);
STATIC I32 S_study_chunk(pTHX_ struct RExC_state_t* state, regnode
**scanp, I32 *deltap, regnode *last, struct scan_data_t *data, U32 flags);
STATIC I32 S_add_data(struct RExC_state_t* state, I32 n, const char *s)
__attribute__warn_unused_result__;
@@ -1864,7 +1864,7 @@
STATIC U8* S_reghopmaybe3(U8 *pos, I32 off, U8 *lim)
__attribute__warn_unused_result__;
-STATIC char* S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const
char *strend, I32 norun)
+STATIC char* S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s,
const char *strend, I32 norun)
__attribute__warn_unused_result__;
STATIC void S_to_utf8_substr(pTHX_ regexp * prog);
@@ -1874,9 +1874,7 @@
#if defined(PERL_IN_DUMP_C) || defined(PERL_DECL_PROT)
STATIC CV* S_deb_curcv(pTHX_ I32 ix);
STATIC void S_debprof(pTHX_ const OP *o);
-STATIC SV* S_pm_description(pTHX_ const PMOP *pm)
- __attribute__nonnull__(pTHX_1);
-
+STATIC SV* S_pm_description(pTHX_ const PMOP *pm);
#endif
#if defined(PERL_IN_SCOPE_C) || defined(PERL_DECL_PROT)
==== //depot/maint-5.8/perl/regcomp.c#82 (text) ====
Index: perl/regcomp.c
--- perl/regcomp.c#81~29925~ 2007-01-22 14:10:59.000000000 -0800
+++ perl/regcomp.c 2007-01-24 07:13:14.000000000 -0800
@@ -444,7 +444,7 @@
floating substrings if needed. */
STATIC void
-S_scan_commit(pTHX_ RExC_state_t *pRExC_state, scan_data_t *data)
+S_scan_commit(pTHX_ const RExC_state_t *pRExC_state, scan_data_t *data)
{
const STRLEN l = CHR_SVLEN(data->last_found);
const STRLEN old_l = CHR_SVLEN(*data->longest);
@@ -476,10 +476,11 @@
SvCUR_set(data->last_found, 0);
{
SV * const sv = data->last_found;
- MAGIC * const mg =
- SvUTF8(sv) && SvMAGICAL(sv) ? mg_find(sv, PERL_MAGIC_utf8) : NULL;
- if (mg)
- mg->mg_len = 0;
+ if (SvUTF8(sv) && SvMAGICAL(sv)) {
+ MAGIC * const mg = mg_find(sv, PERL_MAGIC_utf8);
+ if (mg)
+ mg->mg_len = 0;
+ }
}
data->last_end = -1;
data->flags &= ~SF_BEFORE_EOL;
@@ -487,7 +488,7 @@
/* Can match anything (initialization) */
STATIC void
-S_cl_anything(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_anything(const RExC_state_t *pRExC_state, struct regnode_charclass_class
*cl)
{
ANYOF_CLASS_ZERO(cl);
ANYOF_BITMAP_SETALL(cl);
@@ -514,7 +515,7 @@
/* Can match anything (initialization) */
STATIC void
-S_cl_init(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
@@ -522,7 +523,7 @@
}
STATIC void
-S_cl_init_zero(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl)
+S_cl_init_zero(const RExC_state_t *pRExC_state, struct regnode_charclass_class
*cl)
{
Zero(cl, 1, struct regnode_charclass_class);
cl->type = ANYOF;
@@ -571,7 +572,7 @@
/* 'OR' a given class with another one. Can create false positives */
/* We assume that cl is not inverted */
STATIC void
-S_cl_or(RExC_state_t *pRExC_state, struct regnode_charclass_class *cl, const
struct regnode_charclass_class *or_with)
+S_cl_or(const RExC_state_t *pRExC_state, struct regnode_charclass_class *cl,
const struct regnode_charclass_class *or_with)
{
if (or_with->flags & ANYOF_INVERT) {
/* We do not use
@@ -3368,7 +3369,7 @@
/* I smell either [: or [= or [. -- POSIX has been here, right? */
POSIXCC(UCHARAT(RExC_parse))) {
const char c = UCHARAT(RExC_parse);
- char* s = RExC_parse++;
+ char* const s = RExC_parse++;
while (RExC_parse < RExC_end && UCHARAT(RExC_parse) != c)
RExC_parse++;
@@ -3670,12 +3671,8 @@
n--;
}
}
- if (value == 'p')
- Perl_sv_catpvf(aTHX_ listsv,
- "+utf8::%.*s\n", (int)n, RExC_parse);
- else
- Perl_sv_catpvf(aTHX_ listsv,
- "!utf8::%.*s\n", (int)n, RExC_parse);
+ Perl_sv_catpvf(aTHX_ listsv, "%cutf8::%.*s\n",
+ (value=='p' ? '+' : '!'), (int)n, RExC_parse);
}
RExC_parse = e + 1;
ANYOF_FLAGS(ret) |= ANYOF_UNICODE;
@@ -3744,14 +3741,12 @@
if (range) {
if (!SIZE_ONLY) {
if (ckWARN(WARN_REGEXP)) {
- int w =
+ const int w =
RExC_parse >= rangebegin ?
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (prevvalue < 256) {
ANYOF_BITMAP_SET(ret, prevvalue);
@@ -4160,9 +4155,7 @@
RExC_parse - rangebegin : 0;
vWARN4(RExC_parse,
"False [] range \"%*.*s\"",
- w,
- w,
- rangebegin);
+ w, w, rangebegin);
}
if (!SIZE_ONLY)
ANYOF_BITMAP_SET(ret, '-');
@@ -4306,7 +4299,7 @@
}
if (!SIZE_ONLY) {
- AV *av = newAV();
+ AV * const av = newAV();
SV *rv;
/* The 0th element stores the character class description
@@ -4330,7 +4323,7 @@
STATIC char*
S_nextchar(pTHX_ RExC_state_t *pRExC_state)
{
- char* retval = RExC_parse++;
+ char* const retval = RExC_parse++;
for (;;) {
if (*RExC_parse == '(' && RExC_parse[1] == '?' &&
@@ -4501,8 +4494,9 @@
/*
- regtail - set the next-pointer at the end of a node chain of p to val.
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regtail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode
*val)
{
register regnode *scan;
@@ -4529,8 +4523,9 @@
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
+/* TODO: All three parms should be const */
STATIC void
-S_regoptail(pTHX_ RExC_state_t *pRExC_state, regnode *p, regnode *val)
+S_regoptail(pTHX_ const RExC_state_t *pRExC_state, regnode *p, const regnode
*val)
{
/* "Operandless" and "op != BRANCH" are synonymous in practice. */
if (p == NULL || SIZE_ONLY)
@@ -4570,7 +4565,7 @@
- regdump - dump a regexp onto Perl_debug_log in vaguely comprehensible form
*/
void
-Perl_regdump(pTHX_ regexp *r)
+Perl_regdump(pTHX_ const regexp *r)
{
#ifdef DEBUGGING
SV * const sv = sv_newmortal();
@@ -5137,11 +5132,12 @@
}
-STATIC regnode *
-S_dumpuntil(pTHX_ regnode *start, regnode *node, regnode *last, SV* sv, I32 l)
+STATIC const regnode *
+S_dumpuntil(pTHX_ const regnode *start, const regnode *node,
+ const regnode *last, SV* sv, I32 l)
{
register U8 op = EXACT; /* Arbitrary non-END op. */
- register regnode *next;
+ register const regnode *next;
while (op != END && (!last || node < last)) {
/* While that wasn't END last time... */
@@ -5150,11 +5146,11 @@
op = OP(node);
if (op == CLOSE)
l--;
- next = regnext(node);
+ next = regnext((regnode *)node);
/* Where, what. */
if (OP(node) == OPTIMIZED)
goto after_print;
- regprop(sv, node);
+ regprop(sv, (regnode *) node);
PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start),
(int)(2*l + 1), "", SvPVX_const(sv));
if (next == NULL) /* Next ptr. */
@@ -5164,9 +5160,9 @@
(void)PerlIO_putc(Perl_debug_log, '\n');
after_print:
if (PL_regkind[(U8)op] == BRANCHJ) {
- register regnode *nnode = (OP(next) == LONGJMP
- ? regnext(next)
- : next);
+ register const regnode *nnode = (OP(next) == LONGJMP
+ ? regnext((regnode *)next)
+ : next);
if (last && nnode > last)
nnode = last;
node = dumpuntil(start, NEXTOPER(NEXTOPER(node)), nnode, sv, l + 1);
==== //depot/maint-5.8/perl/regexec.c#71 (text) ====
Index: perl/regexec.c
--- perl/regexec.c#70~29946~ 2007-01-24 05:23:35.000000000 -0800
+++ perl/regexec.c 2007-01-24 07:13:14.000000000 -0800
@@ -215,7 +215,6 @@
S_regcppop(pTHX)
{
I32 i;
- U32 paren = 0;
char *input;
/* Pop REGCP_OTHER_ELEMS before the parentheses loop starts. */
@@ -231,7 +230,7 @@
for (i -= (REGCP_OTHER_ELEMS - REGCP_FRAME_ELEMS);
i > 0; i -= REGCP_PAREN_ELEMS) {
I32 tmps;
- paren = (U32)SSPOPINT;
+ U32 paren = (U32)SSPOPINT;
PL_reg_start_tmp[paren] = (char *) SSPOPPTR;
PL_regstartp[paren] = SSPOPINT;
tmps = SSPOPINT;
@@ -264,10 +263,10 @@
* building DynaLoader will fail:
* "Error: '*' not in typemap in DynaLoader.xs, line 164"
* --jhi */
- for (paren = *PL_reglastparen + 1; (I32)paren <= PL_regnpar; paren++) {
- if ((I32)paren > PL_regsize)
- PL_regstartp[paren] = -1;
- PL_regendp[paren] = -1;
+ for (i = *PL_reglastparen + 1; i <= PL_regnpar; i++) {
+ if (i > PL_regsize)
+ PL_regstartp[i] = -1;
+ PL_regendp[i] = -1;
}
#endif
return input;
@@ -948,7 +947,7 @@
/* We know what class REx starts with. Try to find this position... */
STATIC char *
-S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, const char *strend,
I32 norun)
+S_find_byclass(pTHX_ regexp * prog, const regnode *c, char *s, const char
*strend, I32 norun)
{
const I32 doevery = (prog->reganch & ROPT_SKIP) == 0;
char *m;
@@ -2071,7 +2070,6 @@
STATIC I32 /* 0 failure, 1 success */
S_regtry(pTHX_ regexp *prog, char *startpos)
{
- register I32 i;
register I32 *sp;
register I32 *ep;
CHECKPOINT lastcp;
@@ -2183,6 +2181,7 @@
sp = prog->startp;
ep = prog->endp;
if (prog->nparens) {
+ register I32 i;
for (i = prog->nparens; i > (I32)*PL_reglastparen; i--) {
*++sp = -1;
*++ep = -1;
@@ -4532,8 +4531,8 @@
S_to_utf8_substr(pTHX_ register regexp *prog)
{
if (prog->float_substr && !prog->float_utf8) {
- SV* sv;
- prog->float_utf8 = sv = newSVsv(prog->float_substr);
+ SV* const sv = newSVsv(prog->float_substr);
+ prog->float_utf8 = sv;
sv_utf8_upgrade(sv);
if (SvTAIL(prog->float_substr))
SvTAIL_on(sv);
@@ -4541,8 +4540,8 @@
prog->check_utf8 = sv;
}
if (prog->anchored_substr && !prog->anchored_utf8) {
- SV* sv;
- prog->anchored_utf8 = sv = newSVsv(prog->anchored_substr);
+ SV* const sv = newSVsv(prog->anchored_substr);
+ prog->anchored_utf8 = sv;
sv_utf8_upgrade(sv);
if (SvTAIL(prog->anchored_substr))
SvTAIL_on(sv);
@@ -4555,8 +4554,8 @@
S_to_byte_substr(pTHX_ register regexp *prog)
{
if (prog->float_utf8 && !prog->float_substr) {
- SV* sv;
- prog->float_substr = sv = newSVsv(prog->float_utf8);
+ SV* sv = newSVsv(prog->float_utf8);
+ prog->float_substr = sv;
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->float_utf8))
SvTAIL_on(sv);
@@ -4568,8 +4567,8 @@
prog->check_substr = sv;
}
if (prog->anchored_utf8 && !prog->anchored_substr) {
- SV* sv;
- prog->anchored_substr = sv = newSVsv(prog->anchored_utf8);
+ SV* sv = newSVsv(prog->anchored_utf8);
+ prog->anchored_substr = sv;
if (sv_utf8_downgrade(sv, TRUE)) {
if (SvTAIL(prog->anchored_utf8))
SvTAIL_on(sv);
End of Patch.