On 09/26/2011 10:09 PM, s...@apache.org wrote: > Author: sf > Date: Mon Sep 26 20:09:06 2011 > New Revision: 1176019 > > URL: http://svn.apache.org/viewvc?rev=1176019&view=rev > Log: > Make mod_substitute more efficient: > - Use varbuf resizable buffer instead of constantly allocating pool > memory and copying data around. This changes the memory requirement from > quadratic in ((number of substitutions in line) * (length of line)) to > linear in (length of line). > - Instead of copying buckets just to append a \0, use new ap_regexec_len() > function > > PR: 50559 > > Modified: > httpd/httpd/trunk/CHANGES > httpd/httpd/trunk/modules/filters/mod_substitute.c > > Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c > URL: > http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1176019&r1=1176018&r2=1176019&view=diff > ============================================================================== > --- httpd/httpd/trunk/modules/filters/mod_substitute.c (original) > +++ httpd/httpd/trunk/modules/filters/mod_substitute.c Mon Sep 26 20:09:06 > 2011 /* > @@ -189,82 +174,70 @@ static void do_pattmatch(ap_filter_t *f, > bytes -= len; > buff += len; > } > - if (script->flatten && s1 && !force_quick) { > - /* > - * we've finished looking at the bucket, so remove > the > - * old one and add in our new one > - */ > - s2 = apr_pstrmemdup(tmp_pool, buff, bytes); > - s1 = apr_pstrcat(tmp_pool, s1, s2, NULL); > - tmp_b = apr_bucket_transient_create(s1, strlen(s1), > - f->r->connection->bucket_alloc); > + if (have_match && script->flatten && !force_quick) { > + char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0, > + buff, bytes, &len); > + tmp_b = apr_bucket_pool_create(copy, len, pool, > + > f->r->connection->bucket_alloc); > APR_BUCKET_INSERT_BEFORE(b, tmp_b); > apr_bucket_delete(b); > b = tmp_b; > } > - > } > else if (script->regexp) { > - /* > - * we need a null terminated string here :(. To hopefully > - * save time and memory, we don't alloc for each run > - * through, but only if we need to have a larger chunk > - * to save the string to. So we keep track of how much > - * we've allocated and only re-alloc when we need it. > - * NOTE: this screams for a macro. > - */ > - if (!scratch || (bytes > (fbytes + 1))) {
Not that it matters on trunk now any longer, but on 2.2.x: Don't we have an off by two here? If scratch was already allocated it is fbytes large. If bytes is e.g. equal to fbytes or bytes == fbytes +1 the above won't be true, but we will copy fbytes + 1 (2) data (trailing \0) to the buffer. Shouldn't the check be bytes + 1 > fbytes instead? > - fbytes = bytes + 1; > - scratch = apr_palloc(tpool, fbytes); > - } > - /* reset pointer to the scratch space */ > - p = scratch; > - memcpy(p, buff, bytes); > - p[bytes] = '\0'; > - while (!ap_regexec(script->regexp, p, > + int left = bytes; > + const char *pos = buff; > + while (!ap_regexec_len(script->regexp, pos, left, > AP_MAX_REG_MATCH, regm, 0)) { > - /* first, grab the replacement string */ > - repl = ap_pregsub(tmp_pool, script->replacement, p, > - AP_MAX_REG_MATCH, regm); > + have_match = 1; > if (script->flatten && !force_quick) { > - SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so, > repl); > + /* copy bytes before the match */ > + if (regm[0].rm_so > 0) > + ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so); > + /* add replacement string */ > + ap_varbuf_regsub(&vb, script->replacement, pos, > + AP_MAX_REG_MATCH, regm); > } > else { > + repl = ap_pregsub(pool, script->replacement, pos, > + AP_MAX_REG_MATCH, regm); > len = (apr_size_t) (regm[0].rm_eo - > regm[0].rm_so); > SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len); > tmp_b = apr_bucket_transient_create(repl, > - strlen(repl), > + strlen(repl), > f->r->connection->bucket_alloc); > APR_BUCKET_INSERT_BEFORE(b, tmp_b); > } > /* > - * reset to past what we just did. buff now maps to b > + * reset to past what we just did. pos now maps to b > * again > */ > - p += regm[0].rm_eo; > + pos += regm[0].rm_eo; > + left -= regm[0].rm_eo; > } > - if (script->flatten && s1 && !force_quick) { > - s1 = apr_pstrcat(tmp_pool, s1, p, NULL); > - tmp_b = apr_bucket_transient_create(s1, strlen(s1), > - f->r->connection->bucket_alloc); > + if (have_match && script->flatten && !force_quick) { > + char *copy; > + /* Copy result plus the part after the last match > into > + * a bucket. > + */ > + copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left, > + &len); > + tmp_b = apr_bucket_pool_create(copy, len, pool, > + f->r->connection->bucket_alloc); > APR_BUCKET_INSERT_BEFORE(b, tmp_b); > apr_bucket_delete(b); > b = tmp_b; > } > - > } > else { > - /* huh? */ > + ap_assert(0); > continue; > } > } > } > script++; > } > - > - apr_pool_destroy(tpool); > - > - return; > + ap_varbuf_free(&vb); > } > > static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb) > > > > Regards Rüdiger