On 09/26/2011 10:09 PM, s...@apache.org wrote:
> Author: sf
> Date: Mon Sep 26 20:09:06 2011
> New Revision: 1176019
> 
> URL: http://svn.apache.org/viewvc?rev=1176019&view=rev
> Log:
> Make mod_substitute more efficient:
> - Use varbuf resizable buffer instead of constantly allocating pool
>   memory and copying data around. This changes the memory requirement from
>   quadratic in ((number of substitutions in line) * (length of line)) to
>   linear in (length of line).
> - Instead of copying buckets just to append a \0, use new ap_regexec_len()
>   function
> 
> PR: 50559
> 
> Modified:
>     httpd/httpd/trunk/CHANGES
>     httpd/httpd/trunk/modules/filters/mod_substitute.c
> 
> Modified: httpd/httpd/trunk/modules/filters/mod_substitute.c
> URL: 
> http://svn.apache.org/viewvc/httpd/httpd/trunk/modules/filters/mod_substitute.c?rev=1176019&r1=1176018&r2=1176019&view=diff
> ==============================================================================
> --- httpd/httpd/trunk/modules/filters/mod_substitute.c (original)
> +++ httpd/httpd/trunk/modules/filters/mod_substitute.c Mon Sep 26 20:09:06 
> 2011
                            /*
> @@ -189,82 +174,70 @@ static void do_pattmatch(ap_filter_t *f,
>                          bytes -= len;
>                          buff += len;
>                      }
> -                    if (script->flatten && s1 && !force_quick) {
> -                        /*
> -                         * we've finished looking at the bucket, so remove 
> the
> -                         * old one and add in our new one
> -                         */
> -                        s2 = apr_pstrmemdup(tmp_pool, buff, bytes);
> -                        s1 = apr_pstrcat(tmp_pool, s1, s2, NULL);
> -                        tmp_b = apr_bucket_transient_create(s1, strlen(s1),
> -                                            f->r->connection->bucket_alloc);
> +                    if (have_match && script->flatten && !force_quick) {
> +                        char *copy = ap_varbuf_pdup(pool, &vb, NULL, 0,
> +                                                    buff, bytes, &len);
> +                        tmp_b = apr_bucket_pool_create(copy, len, pool,
> +                                                       
> f->r->connection->bucket_alloc);
>                          APR_BUCKET_INSERT_BEFORE(b, tmp_b);
>                          apr_bucket_delete(b);
>                          b = tmp_b;
>                      }
> -
>                  }
>                  else if (script->regexp) {
> -                    /*
> -                     * we need a null terminated string here :(. To hopefully
> -                     * save time and memory, we don't alloc for each run
> -                     * through, but only if we need to have a larger chunk
> -                     * to save the string to. So we keep track of how much
> -                     * we've allocated and only re-alloc when we need it.
> -                     * NOTE: this screams for a macro.
> -                     */
> -                    if (!scratch || (bytes > (fbytes + 1))) {

Not that it matters on trunk now any longer, but on 2.2.x:

Don't we have an off by two here?
If scratch was already allocated it is fbytes large.
If bytes is e.g. equal to fbytes or bytes == fbytes +1
the above won't be true, but we will
copy fbytes + 1 (2) data (trailing \0) to the buffer.
Shouldn't the check be

bytes + 1 > fbytes

instead?

> -                        fbytes = bytes + 1;
> -                        scratch = apr_palloc(tpool, fbytes);
> -                    }
> -                    /* reset pointer to the scratch space */
> -                    p = scratch;
> -                    memcpy(p, buff, bytes);
> -                    p[bytes] = '\0';
> -                    while (!ap_regexec(script->regexp, p,
> +                    int left = bytes;
> +                    const char *pos = buff;
> +                    while (!ap_regexec_len(script->regexp, pos, left,
>                                         AP_MAX_REG_MATCH, regm, 0)) {
> -                        /* first, grab the replacement string */
> -                        repl = ap_pregsub(tmp_pool, script->replacement, p,
> -                                          AP_MAX_REG_MATCH, regm);
> +                        have_match = 1;
>                          if (script->flatten && !force_quick) {
> -                            SEDSCAT(s1, s2, tmp_pool, p, regm[0].rm_so, 
> repl);
> +                            /* copy bytes before the match */
> +                            if (regm[0].rm_so > 0)
> +                                ap_varbuf_strmemcat(&vb, pos, regm[0].rm_so);
> +                            /* add replacement string */
> +                            ap_varbuf_regsub(&vb, script->replacement, pos,
> +                                             AP_MAX_REG_MATCH, regm);
>                          }
>                          else {
> +                            repl = ap_pregsub(pool, script->replacement, pos,
> +                                              AP_MAX_REG_MATCH, regm);
>                              len = (apr_size_t) (regm[0].rm_eo - 
> regm[0].rm_so);
>                              SEDRMPATBCKT(b, regm[0].rm_so, tmp_b, len);
>                              tmp_b = apr_bucket_transient_create(repl,
> -                                                                strlen(repl),
> +                                             strlen(repl),
>                                               f->r->connection->bucket_alloc);
>                              APR_BUCKET_INSERT_BEFORE(b, tmp_b);
>                          }
>                          /*
> -                         * reset to past what we just did. buff now maps to b
> +                         * reset to past what we just did. pos now maps to b
>                           * again
>                           */
> -                        p += regm[0].rm_eo;
> +                        pos += regm[0].rm_eo;
> +                        left -= regm[0].rm_eo;
>                      }
> -                    if (script->flatten && s1 && !force_quick) {
> -                        s1 = apr_pstrcat(tmp_pool, s1, p, NULL);
> -                        tmp_b = apr_bucket_transient_create(s1, strlen(s1),
> -                                            f->r->connection->bucket_alloc);
> +                    if (have_match && script->flatten && !force_quick) {
> +                        char *copy;
> +                        /* Copy result plus the part after the last match 
> into
> +                         * a bucket.
> +                         */
> +                        copy = ap_varbuf_pdup(pool, &vb, NULL, 0, pos, left,
> +                                              &len);
> +                        tmp_b = apr_bucket_pool_create(copy, len, pool,
> +                                           f->r->connection->bucket_alloc);
>                          APR_BUCKET_INSERT_BEFORE(b, tmp_b);
>                          apr_bucket_delete(b);
>                          b = tmp_b;
>                      }
> -
>                  }
>                  else {
> -                    /* huh? */
> +                    ap_assert(0);
>                      continue;
>                  }
>              }
>          }
>          script++;
>      }
> -
> -    apr_pool_destroy(tpool);
> -
> -    return;
> +    ap_varbuf_free(&vb);
>  }
>  
>  static apr_status_t substitute_filter(ap_filter_t *f, apr_bucket_brigade *bb)
> 
> 
> 
> 


Regards

Rüdiger

Reply via email to