On Mon, Jan 8, 2024 at 10:49 AM Ruediger Pluem <rpl...@apache.org> wrote: > > On 1/5/24 3:08 PM, Yann Ylavic wrote: > > > > process_regexp.diff > > > > Index: modules/metadata/mod_headers.c > > =================================================================== > > --- modules/metadata/mod_headers.c (revision 1914804) > > +++ modules/metadata/mod_headers.c (working copy) > > @@ -622,41 +622,70 @@ static char* process_tags(header_entry *hdr, reque > > } > > return str ? str : ""; > > } > > -static const char *process_regexp(header_entry *hdr, const char *value, > > - request_rec *r) > > + > > +static const char *process_regexp_rec(header_entry *hdr, const char *value, > > + request_rec *r, ap_regmatch_t > > pmatch[], > > + int flags) > > { > > - ap_regmatch_t pmatch[AP_MAX_REG_MATCH]; > > - const char *subs; > > - const char *remainder; > > char *ret; > > - int diffsz; > > - if (ap_regexec(hdr->regex, value, AP_MAX_REG_MATCH, pmatch, 0)) { > > + const char *subs, *remainder; > > + apr_size_t val_len, subs_len, rem_len; > > + apr_size_t match_start, match_end; > > + > > + val_len = strlen(value); > > + if (ap_regexec_len(hdr->regex, value, val_len, > > + AP_MAX_REG_MATCH, pmatch, > > + flags)) { > > /* no match, nothing to do */ > > return value; > > } > > + > > /* Process tags in the input string rather than the resulting > > * substitution to avoid surprises > > */ > > - subs = ap_pregsub(r->pool, process_tags(hdr, r), value, > > AP_MAX_REG_MATCH, pmatch); > > + subs = ap_pregsub(r->pool, process_tags(hdr, r), value, > > + AP_MAX_REG_MATCH, pmatch); > > if (subs == NULL) > > return NULL; > > - diffsz = strlen(subs) - (pmatch[0].rm_eo - pmatch[0].rm_so); > > + subs_len = strlen(subs); > > + > > + ap_assert(pmatch[0].rm_so >= 0 && pmatch[0].rm_so <= pmatch[0].rm_eo); > > + match_start = pmatch[0].rm_so; > > + match_end = pmatch[0].rm_eo; > > if (hdr->action == hdr_edit) { > > - remainder = value + pmatch[0].rm_eo; > > + remainder = value + match_end; > > } > > else { /* recurse to edit multiple matches if applicable */ > > - remainder = process_regexp(hdr, value + pmatch[0].rm_eo, r); > > - if (remainder == NULL) > > - return NULL; > > - diffsz += strlen(remainder) - strlen(value + pmatch[0].rm_eo); > > + if (match_end == 0 && val_len) { > > + /* advance on empty match to avoid infinite recursion */ > > + match_start = match_end = 1; > > Not debating that > > Header edit* headername ^ prefix > > should be be > > Header edit headername ^ prefix > > but wouldn't that do the wrong thing and add the prefix *after* the first > character of the header value?
Yes correct, this patch won't set the skipped char at the right place finally. New v2 attached. > > > + } > > + if (match_end < val_len) { > > + remainder = process_regexp_rec(hdr, value + match_end, > > Shouldn't we increase match_end just here by 1 if match_end == 0 && val_len ? See v2, match_end is not used below anyway. > > > + r, pmatch, AP_REG_NOTBOL); As noted in v2 we have an issue here by "losing" the beginning of the value on recursion: /* XXX: recursing by using AP_REG_NOTBOL (because we are not at ^ * anymore) and then "losing" the beginning of the string is not * always correct. Say we match "(?<=a)ba" against "ababa", on * recursion ap_regexec_len() will not know that the second "b" is * preceded by "a" thus not match. We'd need a new ap_regexec_ex() * that can take match_end as an offset to fix this.. */ Not sure how far we should go with this patch.. Regards; Yann.
Index: modules/metadata/mod_headers.c =================================================================== --- modules/metadata/mod_headers.c (revision 1914804) +++ modules/metadata/mod_headers.c (working copy) @@ -622,41 +622,90 @@ static char* process_tags(header_entry *hdr, reque } return str ? str : ""; } -static const char *process_regexp(header_entry *hdr, const char *value, - request_rec *r) + +static const char *process_regexp_core(header_entry *hdr, + const char *val, apr_size_t val_len, + request_rec *r, ap_regmatch_t *pmatch, + int flags) { - ap_regmatch_t pmatch[AP_MAX_REG_MATCH]; - const char *subs; - const char *remainder; char *ret; - int diffsz; - if (ap_regexec(hdr->regex, value, AP_MAX_REG_MATCH, pmatch, 0)) { + const char *sub, *rem; + apr_size_t sub_len, rem_sz; + apr_size_t match_start, match_end; + char skip = 0; + + if (ap_regexec_len(hdr->regex, val, val_len, + AP_MAX_REG_MATCH, pmatch, flags)) { /* no match, nothing to do */ - return value; + return val; } + /* Process tags in the input string rather than the resulting * substitution to avoid surprises */ - subs = ap_pregsub(r->pool, process_tags(hdr, r), value, AP_MAX_REG_MATCH, pmatch); - if (subs == NULL) + sub = ap_pregsub(r->pool, process_tags(hdr, r), val, + AP_MAX_REG_MATCH, pmatch); + if (sub == NULL) return NULL; - diffsz = strlen(subs) - (pmatch[0].rm_eo - pmatch[0].rm_so); + sub_len = strlen(sub); + + ap_assert(pmatch[0].rm_so >= 0 && pmatch[0].rm_so <= pmatch[0].rm_eo); + match_start = pmatch[0].rm_so; + match_end = pmatch[0].rm_eo; if (hdr->action == hdr_edit) { - remainder = value + pmatch[0].rm_eo; + rem = val + match_end; } else { /* recurse to edit multiple matches if applicable */ - remainder = process_regexp(hdr, value + pmatch[0].rm_eo, r); - if (remainder == NULL) - return NULL; - diffsz += strlen(remainder) - strlen(value + pmatch[0].rm_eo); + /* On empty match always advance by one char to avoid an infinite + * recursion, but save the skipped char which should go after the + * substitution still. + */ + if (val_len && !match_end) { + match_end = 1; + skip = *val; + } + /* If skipping the char reaches the end of string we still want to + * run the last recursion at $ since it might match and substitute + * something. + */ + if (match_end < val_len || skip) { + /* XXX: recursing by using AP_REG_NOTBOL (because we are not at ^ + * anymore) and then "losing" the beginning of the string is not + * always correct. Say we match "(?<=a)ba" against "ababa", on + * recursion ap_regexec_len() will not know that the second "b" is + * preceded by "a" thus not match. We'd need a new ap_regexec_ex() + * that can take match_end as an offset to fix this.. + */ + rem = process_regexp_core(hdr, + val + match_end, val_len - match_end, + r, pmatch, AP_REG_NOTBOL); + if (rem == NULL) + return NULL; + } + else { + rem = ""; + } } - ret = apr_palloc(r->pool, strlen(value) + 1 + diffsz); - memcpy(ret, value, pmatch[0].rm_so); - strcpy(ret + pmatch[0].rm_so, subs); - strcat(ret, remainder); + rem_sz = strlen(rem) + 1; /* includes \0 */ + + ret = apr_palloc(r->pool, match_start + sub_len + (skip ? 1 : 0) + rem_sz); + memcpy(ret, val, match_start); + memcpy(ret + match_start, sub, sub_len); + if (skip) { + ret[match_start + sub_len] = skip; + sub_len++; + } + memcpy(ret + match_start + sub_len, rem, rem_sz); return ret; } +static const char *process_regexp(header_entry *hdr, const char *value, + request_rec *r) +{ + ap_regmatch_t pmatch[AP_MAX_REG_MATCH]; + return process_regexp_core(hdr, value, strlen(value), r, pmatch, 0); +} + static int echo_header(void *v, const char *key, const char *val) { echo_do *ed = (echo_do *)v;