On Tue, Oct 19, 2021 at 4:31 PM <minf...@apache.org> wrote:
>
> +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade 
> *bbOut,
> +                                                     apr_bucket_brigade 
> *bbIn,
> +                                                     apr_read_type_e block,
> +                                                     const char *boundary,
> +                                                     apr_size_t boundary_len,
> +                                                     apr_off_t maxbytes)
> +{
[]
> +        /*
> +         * Fast path.
> +         *
> +         * If we have at least one boundary worth of data, do an optimised
> +         * substring search for the boundary, and split quickly if found.
> +         */
> +        if (len >= boundary_len) {
> +
> +            apr_size_t off;
> +            apr_size_t leftover;
> +
> +            pos = strnstr(str, boundary, len);

apr_strnstr() maybe, strnstr() is non-standard AFAICT and possibly not
available on all platforms (Windows)?

> +
> +            /* definitely found it, we leave */
> +            if (pos != NULL) {
> +
> +                off = pos - str;
> +
> +                /* everything up to the boundary */
> +                if (off) {
> +
> +                    apr_bucket_split(e, off);
> +                    APR_BUCKET_REMOVE(e);
> +                    APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> +                    e = APR_BRIGADE_FIRST(bbIn);
> +                }
> +
> +                /* cut out the boundary */
> +                apr_bucket_split(e, boundary_len);
> +                apr_bucket_delete(e);
> +
> +                return APR_SUCCESS;
> +            }
> +
> +            /* any partial matches at the end? */
> +            leftover = boundary_len - 1;
> +            off = (len - leftover);

Can't we fall through the below "else" slow path here? The code looks
quite similar.

> +
> +            while (leftover) {
> +                if (!strncmp(str + off, boundary, leftover)) {
> +
> +                    if (off) {
> +
> +                        apr_bucket_split(e, off);
> +                        APR_BUCKET_REMOVE(e);
> +                        APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> +                        e = APR_BRIGADE_FIRST(bbIn);
> +                    }
> +
> +                    outbytes += off;
> +                    inbytes -= off;
> +
> +                    goto skip;
> +                }
> +                off++;
> +                leftover--;
> +            }
> +
> +            APR_BUCKET_REMOVE(e);
> +            APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> +            outbytes += len;
> +
> +            continue;
> +
> +        }
> +
> +        /*
> +         * Slow path.
> +         *
> +         * We need to read ahead at least one boundary worth of data so
> +         * we can search across the bucket edges.
> +         */
> +        else {
> +
> +            apr_size_t off = 0;
> +
> +            /* find all definite non matches */
> +            while (len) {
> +                if (!strncmp(str + off, boundary, len)) {
> +
> +                    if (off) {
> +
> +                        apr_bucket_split(e, off);
> +                        APR_BUCKET_REMOVE(e);
> +                        APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> +                        e = APR_BRIGADE_FIRST(bbIn);
> +                    }
> +
> +                    inbytes -= off;
> +
> +                    goto skip;
> +                }
> +                off++;
> +                len--;
> +            }
> +
> +            APR_BUCKET_REMOVE(e);
> +            APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +            continue;

"outbytes" not bumped?

> +
> +        }
> +
> +        /*
> +         * If we reach skip, it means the bucket in e is:
> +         *
> +         * - shorter than the boundary
> +         * - matches the boundary up to the bucket length
> +         * - might match more buckets
> +         *
> +         * Read further buckets and check whether the boundary matches all
> +         * the way to the end. If so, we have a match. If no match, shave off
> +         * one byte and continue round to try again.
> +         */
> +skip:

Below you read ahead to try to match the end of the boundary, and if
not split one byte into bbOut before restarting the loop, which could
result in a quite fragmented bbOut and is also what you almost did
above already.

Couldn't we instead save the partially matching bucket(s) in a
temporary brigade (provided by the caller) and restart the loop with
the more effective [apr_]strnstr() match (at boundary+off)?

> +
> +        for (next = APR_BUCKET_NEXT(e);
> +                inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn);
> +                next = APR_BUCKET_NEXT(next)) {
> +
> +            const char *str;
> +            apr_size_t off;
> +            apr_size_t len;
> +
> +            rv = apr_bucket_read(next, &str, &len, block);
> +
> +            if (rv != APR_SUCCESS) {
> +                return rv;
> +            }
> +
> +            off = boundary_len - inbytes;
> +
> +            if (len > off) {
> +
> +                /* not a match, bail out */
> +                if (strncmp(str, boundary + inbytes, off)) {
> +                    break;
> +                }
> +
> +                /* a match! remove the boundary and return */
> +                apr_bucket_split(next, off);
> +
> +                e = APR_BUCKET_NEXT(next);
> +
> +                for (prev = APR_BRIGADE_FIRST(bbIn);
> +                        prev != e;
> +                        prev = APR_BRIGADE_FIRST(bbIn)) {
> +
> +                    apr_bucket_delete(prev);
> +
> +                }
> +
> +                return APR_SUCCESS;
> +
> +            }
> +            if (len == off) {
> +
> +                /* not a match, bail out */
> +                if (strncmp(str, boundary + inbytes, off)) {
> +                    break;
> +                }
> +
> +                /* a match! remove the boundary and return */
> +                e = APR_BUCKET_NEXT(next);
> +
> +                for (prev = APR_BRIGADE_FIRST(bbIn);
> +                        prev != e;
> +                        prev = APR_BRIGADE_FIRST(bbIn)) {
> +
> +                    apr_bucket_delete(prev);
> +
> +                }
> +
> +                return APR_SUCCESS;
> +
> +            }
> +            else if (len) {
> +
> +                /* not a match, bail out */
> +                if (strncmp(str, boundary + inbytes, len)) {
> +                    break;
> +                }
> +
> +                /* still hope for a match */
> +                inbytes += len;
> +            }
> +
> +        }
> +
> +        /*
> +         * If we reach this point, the bucket e did not match the boundary
> +         * in the subsequent buckets.
> +         *
> +         * Bump one byte off, and loop round to search again.
> +         */
> +        apr_bucket_split(e, 1);
> +        APR_BUCKET_REMOVE(e);
> +        APR_BRIGADE_INSERT_TAIL(bbOut, e);
> +
> +        outbytes++;
> +
> +    }
> +
> +    return APR_INCOMPLETE;
> +}


Regards;
Yann.

Reply via email to