On 09/26/2011 10:09 PM, [email protected] wrote:
> Author: sf
> Date: Mon Sep 26 20:09:06 2011
> New Revision: 1176019
>
> URL: http://svn.apache.org/viewvc?rev=1176019&view=rev
> Log:
> Make mod_substitute more efficient:
> - Use varbuf resizable buffer instead of constantly allocating pool
> memory and copying data around. This changes the memory requirement from
> quadratic in ((number of substitutions in line) * (length of line)) to
> linear in (length of line).
> - Instead of copying buckets just to append a \0, use new ap_regexec_len()
> function
>
> PR: 50559
>
> Modified:
> httpd/httpd/trunk/CHANGES
> httpd/httpd/trunk/modules/filters/mod_substitute.c
>
> Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c
> URL:
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1176019&r1=1176018&r2=1176019&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/filters/mod_substitute.c (original)
> +++ httpd/httpd/trunk/modules/filters/mod_substitute.c Mon Sep 26 20:09:06
> 2011
/*
> @@ -189,82 +174,70 @@ static void do_pattmatch(ap_filter_t *f,
> bytes -= len;
> buff += len;
> }
> - if (script->flatten && s1 && !force_quick) {
> - /*
> - * we've finished looking at the bucket, so remove
> the
> - * old one and add in our new one
> - */
> - s2 = apr_pstrmemdup(tmp_pool, buff, bytes);
> - s1 = apr_pstrcat(tmp_pool, s1, s2, NULL);
> - tmp_b = apr_bucket_transient_create(s1, strlen(s1),
> - f->r->connection->bucket_alloc);
> + if (have_match && script->flatten && !force_quick) {
> + char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0,
> + buff, bytes, &len);
> + tmp_b = apr_bucket_pool_create(copy, len, pool,
> +
> f->r->connection->bucket_alloc);
> APR_BUCKET_INSERT_BEFORE(b, tmp_b);
> apr_bucket_delete(b);
> b = tmp_b;
> }
> -
> }
> else if (script->regexp) {
> - /*
> - * we need a null terminated string here :(. To hopefully
> - * save time and memory, we don't alloc for each run
> - * through, but only if we need to have a larger chunk
> - * to save the string to. So we keep track of how much
> - * we've allocated and only re-alloc when we need it.
> - * NOTE: this screams for a macro.
> - */
> - if (!scratch || (bytes > (fbytes + 1))) {
Not that it matters on trunk now any longer, but on 2.2.x:
Don't we have an off by two here?
If scratch was already allocated it is fbytes large.
If bytes is e.g. equal to fbytes or bytes == fbytes +1
the above won't be true, but we will
copy fbytes + 1 (2) data (trailing \0) to the buffer.
Shouldn't the check be
bytes + 1 > fbytes
instead?
> - fbytes = bytes + 1;
> - scratch = apr_palloc(tpool, fbytes);
> - }
> - /* reset pointer to the scratch space */
> - p = scratch;
> - memcpy(p, buff, bytes);
> - p[bytes] = '\0';
> - while (!ap_regexec(script->regexp, p,
> + int left = bytes;
> + const char *pos = buff;
> + while (!ap_regexec_len(script->regexp, pos, left,
> AP_MAX_REG_MATCH, regm, 0)) {
> - /* first, grab the replacement string */
> - repl = ap_pregsub(tmp_pool, script->replacement, p,
> - AP_MAX_REG_MATCH, regm);
> + have_match = 1;
> if (script->flatten && !force_quick) {
> - SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so,
> repl);
> + /* copy bytes before the match */
> + if (regm[0].rm_so > 0)
> + ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
> + /* add replacement string */
> + ap_varbuf_regsub(&vb, script->replacement, pos,
> + AP_MAX_REG_MATCH, regm);
> }
> else {
> + repl = ap_pregsub(pool, script->replacement, pos,
> + AP_MAX_REG_MATCH, regm);
> len = (apr_size_t) (regm[0].rm_eo -
> regm[0].rm_so);
> SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
> tmp_b = apr_bucket_transient_create(repl,
> - strlen(repl),
> + strlen(repl),
> f->r->connection->bucket_alloc);
> APR_BUCKET_INSERT_BEFORE(b, tmp_b);
> }
> /*
> - * reset to past what we just did. buff now maps to b
> + * reset to past what we just did. pos now maps to b
> * again
> */
> - p += regm[0].rm_eo;
> + pos += regm[0].rm_eo;
> + left -= regm[0].rm_eo;
> }
> - if (script->flatten && s1 && !force_quick) {
> - s1 = apr_pstrcat(tmp_pool, s1, p, NULL);
> - tmp_b = apr_bucket_transient_create(s1, strlen(s1),
> - f->r->connection->bucket_alloc);
> + if (have_match && script->flatten && !force_quick) {
> + char *copy;
> + /* Copy result plus the part after the last match
> into
> + * a bucket.
> + */
> + copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left,
> + &len);
> + tmp_b = apr_bucket_pool_create(copy, len, pool,
> + f->r->connection->bucket_alloc);
> APR_BUCKET_INSERT_BEFORE(b, tmp_b);
> apr_bucket_delete(b);
> b = tmp_b;
> }
> -
> }
> else {
> - /* huh? */
> + ap_assert(0);
> continue;
> }
> }
> }
> script++;
> }
> -
> - apr_pool_destroy(tpool);
> -
> - return;
> + ap_varbuf_free(&vb);
> }
>
> static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
>
>
>
>
Regards
Rüdiger