On Tue, Oct 19, 2021 at 4:31 PM <minf...@apache.org> wrote: > > +APR_DECLARE(apr_status_t) apr_brigade_split_boundary(apr_bucket_brigade > *bbOut, > + apr_bucket_brigade > *bbIn, > + apr_read_type_e block, > + const char *boundary, > + apr_size_t boundary_len, > + apr_off_t maxbytes) > +{ [] > + /* > + * Fast path. > + * > + * If we have at least one boundary worth of data, do an optimised > + * substring search for the boundary, and split quickly if found. > + */ > + if (len >= boundary_len) { > + > + apr_size_t off; > + apr_size_t leftover; > + > + pos = strnstr(str, boundary, len);
apr_strnstr() maybe, strnstr() is non-standard AFAICT and possibly not available on all platforms (Windows)? > + > + /* definitely found it, we leave */ > + if (pos != NULL) { > + > + off = pos - str; > + > + /* everything up to the boundary */ > + if (off) { > + > + apr_bucket_split(e, off); > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + > + e = APR_BRIGADE_FIRST(bbIn); > + } > + > + /* cut out the boundary */ > + apr_bucket_split(e, boundary_len); > + apr_bucket_delete(e); > + > + return APR_SUCCESS; > + } > + > + /* any partial matches at the end? */ > + leftover = boundary_len - 1; > + off = (len - leftover); Can't we fall through the below "else" slow path here? The code looks quite similar. > + > + while (leftover) { > + if (!strncmp(str + off, boundary, leftover)) { > + > + if (off) { > + > + apr_bucket_split(e, off); > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + > + e = APR_BRIGADE_FIRST(bbIn); > + } > + > + outbytes += off; > + inbytes -= off; > + > + goto skip; > + } > + off++; > + leftover--; > + } > + > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + > + outbytes += len; > + > + continue; > + > + } > + > + /* > + * Slow path. > + * > + * We need to read ahead at least one boundary worth of data so > + * we can search across the bucket edges. > + */ > + else { > + > + apr_size_t off = 0; > + > + /* find all definite non matches */ > + while (len) { > + if (!strncmp(str + off, boundary, len)) { > + > + if (off) { > + > + apr_bucket_split(e, off); > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + > + e = APR_BRIGADE_FIRST(bbIn); > + } > + > + inbytes -= off; > + > + goto skip; > + } > + off++; > + len--; > + } > + > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + continue; "outbytes" not bumped? > + > + } > + > + /* > + * If we reach skip, it means the bucket in e is: > + * > + * - shorter than the boundary > + * - matches the boundary up to the bucket length > + * - might match more buckets > + * > + * Read further buckets and check whether the boundary matches all > + * the way to the end. If so, we have a match. If no match, shave off > + * one byte and continue round to try again. > + */ > +skip: Below you read ahead to try to match the end of the boundary, and if not split one byte into bbOut before restarting the loop, which could result in a quite fragmented bbOut and is also what you almost did above already. Couldn't we instead save the partially matching bucket(s) in a temporary brigade (provided by the caller) and restart the loop with the more effective [apr_]strnstr() match (at boundary+off)? > + > + for (next = APR_BUCKET_NEXT(e); > + inbytes < boundary_len && next != APR_BRIGADE_SENTINEL(bbIn); > + next = APR_BUCKET_NEXT(next)) { > + > + const char *str; > + apr_size_t off; > + apr_size_t len; > + > + rv = apr_bucket_read(next, &str, &len, block); > + > + if (rv != APR_SUCCESS) { > + return rv; > + } > + > + off = boundary_len - inbytes; > + > + if (len > off) { > + > + /* not a match, bail out */ > + if (strncmp(str, boundary + inbytes, off)) { > + break; > + } > + > + /* a match! remove the boundary and return */ > + apr_bucket_split(next, off); > + > + e = APR_BUCKET_NEXT(next); > + > + for (prev = APR_BRIGADE_FIRST(bbIn); > + prev != e; > + prev = APR_BRIGADE_FIRST(bbIn)) { > + > + apr_bucket_delete(prev); > + > + } > + > + return APR_SUCCESS; > + > + } > + if (len == off) { > + > + /* not a match, bail out */ > + if (strncmp(str, boundary + inbytes, off)) { > + break; > + } > + > + /* a match! remove the boundary and return */ > + e = APR_BUCKET_NEXT(next); > + > + for (prev = APR_BRIGADE_FIRST(bbIn); > + prev != e; > + prev = APR_BRIGADE_FIRST(bbIn)) { > + > + apr_bucket_delete(prev); > + > + } > + > + return APR_SUCCESS; > + > + } > + else if (len) { > + > + /* not a match, bail out */ > + if (strncmp(str, boundary + inbytes, len)) { > + break; > + } > + > + /* still hope for a match */ > + inbytes += len; > + } > + > + } > + > + /* > + * If we reach this point, the bucket e did not match the boundary > + * in the subsequent buckets. > + * > + * Bump one byte off, and loop round to search again. > + */ > + apr_bucket_split(e, 1); > + APR_BUCKET_REMOVE(e); > + APR_BRIGADE_INSERT_TAIL(bbOut, e); > + > + outbytes++; > + > + } > + > + return APR_INCOMPLETE; > +} Regards; Yann.