On Thu, Aug 12, 2004 at 02:06:39PM -0700, Justin Erenkrantz wrote:
> --On Thursday, August 12, 2004 3:52 PM -0400 Glenn Strauss 
> <[EMAIL PROTECTED]> wrote:
> 
> >I saw so much repeated code for parsing brigades, that I created a
> >"readahead" API: ap_brigade_ra().  It is passed similar arguments as
> >those to input filters, and additionally is passed a readahead struct
> >and a readahead limit.  This abstraction acts as a buffer and parses
> >out bytes and lines, reading from upstream filters when it needs to.
> 
> Okay, I have no idea what this ap_brigade_ra() API is or what its intention 
> is based upon your description.

Code reuse and consistent input filter behavior. :)

> You keep referring to the 'readahead API' 
> as the magic solution for all of the issues I raised.  So, I respectfully 
> ask that you please provide this API with examples to this list if you want 
> to continue the discussion.

I'll try.  Apologies for the length.
The following is an attempt at some documentation.

The code below has _not_ been tested, and in fact compilation has not
even been attempted.  It's been cut-n-pasted from various places in
my code.  Still I would be happy to fix any mistakes that are pointed out.

Thanks!
Glenn

(If there are any questions of license, the code that is mine below
 is released under the revised BSD license.  Use it in good health.)



typedef struct filter_readahead_ctx_t {

        apr_bucket_brigade *readahead;
        apr_bucket_brigade *partial;
        apr_bucket_brigade *lookahead;

        /* (internal use) bucket following last in-memory data bucket */
        apr_bucket *b;

        /* length of data read ahead */
        /* (up to end of brigade or to first bucket that is not in-memory) */
        /* (includes length of data in both readahead and 'partial' brigades) */
        /* caller MUST update this if it modifies readahead brigade */
        apr_off_t len_readahead;

        /* length of data in 'partial' brigade (all buckets are in-memory) */
        /* caller MUST update this if it modifies 'partial' brigade */
        apr_off_t len_partial;

        /* (private) running count of data set aside from ra->readahead
         * when performing speculative reads (AP_MODE_PEEK) */
        apr_off_t len_lookahead;

        /* (private) running count of data from upstream (appended to readahead)
         * in excess of caller-specified max length to readahead when reading
         * from a non-in-memory bucket.  (On subsequent calls to read routines,
         * it is added to len_upstream to simulate reads from upstream) */
        apr_off_t len_pending;

        /* running count of data read from upstream (appended to readahead) */
        /* (includes data that was subsequently accounted in len_downstream)*/
        /* caller may reset this at any time */
        apr_off_t len_upstream;

        /* running count of data sent downstream (appended to caller bb) */
        /* caller may reset this at any time */
        apr_off_t len_downstream;

} ap_filter_ra_t;


AP_DECLARE(void)
ap_filter_ra_init(ap_filter_ra_t * const restrict ra,
                      apr_pool_t * const restrict pool,
                      apr_bucket_alloc_t * const restrict bucket_alloc)
{
    memset(ra, '\0', sizeof(ap_filter_ra_t));
    ra->readahead = apr_brigade_create(pool, bucket_alloc);

    /* (created when needed)
    ra->partial   = apr_brigade_create(pool, bucket_alloc);
    ra->lookahead = apr_brigade_create(pool, bucket_alloc);
     */

    ra->b = APR_BRIGADE_SENTINEL(ra->readahead);
}

/* (not strictly necessary, but frees brigades sooner than connection cleanup)*/
AP_DECLARE(void)
ap_filter_ra_destroy(ap_filter_ra_t * const restrict ra)
{
    if (ra->readahead != NULL) {
        apr_brigade_destroy(ra->readahead);
    }
    if (ra->lookahead != NULL) {
        apr_brigade_destroy(ra->lookahead);
    }
    if (ra->partial   != NULL) {
        apr_brigade_destroy(ra->partial);
    }
}



/**
 * Read bytes from upstream.
 * @param ra readahead structure to keep readahead state
 * @param f current filter (not f->next)
 *        (e.g. filter with which ra is associated)
 * @param bb brigade in which to return results
 * @param r_mode AP_MODE_BYTES (and not AP_MODE_LINE), plus optional flags
 *        (neither or one of AP_MODE_PASS_SPECIALS or AP_MODE_PEEK, not both)
 *        (@see ap_input_mode_t)
 * @param r_type APR_BLOCK_READ or APR_NONBLOCK_READ
 *        (@see apr_read_type_e)
 * @param r_limit max number bytes to return in bb
 * @param ra_limit max number of bytes to readahead
 *        (includes data buffered in ra and data read from upstream)
 *        (if ra_limit <= 0, r_limit is used as ra_limit)
 * @return APR_SUCCESS and buckets appended to bb if successful
 *         APR_EINVAL if AP_MODE_PASS_SPECIALS combined with AP_MODE_PEEK
 *         or other return values from
 *           ap_get_brigade(), apr_bucket_read(), or apr_bucket_split()
 * @remark metadata buckets are returned in bb in-line with data buckets
 */
AP_DECLARE(apr_status_t)
ap_brigade_ra_bytes(ap_filter_ra_t     * const restrict ra,
                    ap_filter_t        * const restrict f,
                    apr_bucket_brigade * const restrict bb,
                    const ap_input_mode_t r_mode,
                    const apr_read_type_e r_type,
                    const apr_off_t r_limit,
                    const apr_off_t ra_limit);

/**
 * Read complete line from upstream.
 * @param ra readahead structure to keep readahead state
 * @param f current filter (not f->next)
 *        (e.g. filter with which ra is associated)
 * @param bb brigade in which to return results
 * @param r_mode AP_MODE_LINE (and not AP_MODE_BYTES), plus optional flags
 *        (optionally AP_MODE_MIME_FOLDING and/or AP_MODE_PEEK)
 *        (@see ap_input_mode_t)
 * @param r_type APR_BLOCK_READ or APR_NONBLOCK_READ
 *        (@see apr_read_type_e)
 * @param r_limit max number bytes to return in bb
 * @param ra_limit max number of bytes to readahead
 *        (includes data buffered in ra and data read from upstream)
 *        (if ra_limit <= 0, r_limit is used as ra_limit)
 *        (if ra_limit < 0, AP_MODE_LINE is used when reading from upstream,
 *         else AP_MODE_BYTES is used to read from upstream and buffer in ra)
 * @return APR_SUCCESS if successful
 *           If line is completed, buckets containing line are appended to bb
 *           If metadata buckets are detected (but not EOS), consecutive
 *             metadata buckets are appended to bb instead of line data.
 *             A subsequent call to this routine is needed to get line, or
 *             metadata buckets further on in the line.
 *           Otherwise, bb is unmodified.
 *         APR_ENOSPC if r_limit is reached before LF is detected
 *         APR_EOF and EOS bucket is appended to bb if EOS bucket is detected
 *           and a partial line is not in progress.
 *         APR_INCOMPLETE if EOS is detected and a partial line is in progress.
 *           bb is not modified in this case.
 *         or other return values from
 *           ap_get_brigade(), apr_bucket_read(), or apr_bucket_split()
 * @remark When AP_MODE_MIME_FOLDING is used, the line is not returned until
 *           the beginning of the next line (or EOS) is received, at which point
 *           we know that the following line is not a continuation line.  Then,
 *           the line is returned as-is, including continuations (not unfolded).
 *           r_limit applies to the entire line, including continuation lines.
 *
 */
AP_DECLARE(apr_status_t)
ap_brigade_ra_line(ap_filter_ra_t     * const restrict ra,
                   ap_filter_t        * const restrict f,
                   apr_bucket_brigade * const restrict bb,
                   const ap_input_mode_t r_mode,
                   const apr_read_type_e r_type,
                   const apr_off_t r_limit,
                   const apr_off_t ra_limit);

/**
 * Read bytes or line from upstream.
 * @param ra readahead structure to keep readahead state
 * @param f current filter (not f->next)
 *        (e.g. filter with which ra is associated)
 * @param bb brigade in which to return results
 * @param r_mode AP_MODE_BYTES or AP_MODE_LINE, plus optional flags
 *        (@see ap_input_mode_t)
 * @param r_type APR_BLOCK_READ or APR_NONBLOCK_READ
 *        (@see apr_read_type_e)
 * @param r_limit max number bytes to return in bb
 * @param ra_limit max number of bytes to readahead
 *        (includes data buffered in ra and data read from upstream)
 *        (if ra_limit <= 0, r_limit is used as ra_limit)
 * @return APR_SUCCESS and buckets appended to bb if successful
 *         or other return values from
 *           ap_get_brigade(), apr_bucket_read(), or apr_bucket_split()
 * @see apr_brigade_ra_bytes
 * @see apr_brigade_ra_line
 * @remark If parameters are not complex expressions, use ap_brigade_ra_macro()
 * @remark If byte/line mode is known, call ap_brigade_ra_{bytes,line} directly
 */
#define ap_brigade_ra_macro(ra,f,bb,r_mode,r_type,r_limit,ra_limit)            \
 ((mode) & AP_MODE_BYTES)                                                      \
    ? ap_brigade_ra_bytes((ra),(f),(bb),(r_mode),(r_type),(r_limit),(ra_limit))\
    : ap_brigade_ra_line( (ra),(f),(bb),(r_mode),(r_type),(r_limit),(ra_limit))\

AP_DECLARE(apr_status_t)
ap_brigade_ra(ap_filter_ra_t     * const restrict ra,
              ap_filter_t        * const restrict f,
              apr_bucket_brigade * const restrict bb,
              const ap_input_mode_t r_mode,
              const apr_read_type_e r_type,
              const apr_off_t r_limit,
              const apr_off_t ra_limit)
{
    return ap_brigade_ra_macro(ra, f, bb, r_mode, r_type, r_limit, ra_limit);
}


/* ap_get_brigade() really could be a macro.
 * It probably is not a macro because the filter 'next' might be a complex
 * sequence, and the original coder did not want to impose the typical macro
 * restriction (no side effects) on 'next' (used 3 times in the macro below)
 * (append_eos_bucket() is not inlined because it is so infrequently called)
 */
AP_DECLARE(apr_status_t)  __attribute_noinline__
ap_append_eos_bucket(apr_bucket_brigade * const restrict bb)
{
    APR_BRIGADE_INSERT_TAIL(bb, apr_bucket_eos_create(bb->bucket_alloc));
    return APR_SUCCESS;
}

#define ap_get_brigade_macro(next,bb,mode,block,readbytes)                     \
  ((next)                                                                      \
   ? (next)->frec->filter_func.in_func((next),(bb),(mode),(block),(readbytes)) \
   : ap_append_eos_bucket(bb))

/* [replace ap_get_brigade in server/util_filter.c]
 * Change ap_get_brigade() to return EOS bucket when 'next' is NULL
 * instead of AP_NOBODY_READ
 */
AP_DECLARE(apr_status_t) ap_get_brigade(ap_filter_t * const restrict next,
                                        apr_bucket_brigade * const restrict bb,
                                        const ap_input_mode_t mode,
                                        const apr_read_type_e block,
                                        const apr_off_t readbytes)
{
    return ap_get_brigade_macro(next, bb, mode, block, readbytes);
}



/*
 *
 * Some usage examples of readahead API
 *
 *
 */



/* core input filter
 * [need to extend core_ctx_t to contain ap_filter_ra_t]
 */

static core_ctx_t *
core_input_filter_init(ap_filter_t * const restrict f)
{
    conn_rec * const restrict c = f->c;
    core_net_rec * const restrict net = f->ctx;

    core_ctx_t * const restrict
      ctx = apr_palloc(c->pool, sizeof(core_ctx_t));

    apr_bucket * const restrict
      s = apr_bucket_socket_create(net->client_socket, c->bucket_alloc);

    ap_filter_ra_init(&ctx->ra, c->pool, c->bucket_alloc);

    /* seed readahead brigade with the client socket */
    APR_BRIGADE_INSERT_TAIL(ctx->ra.readahead, s);
    ctx->ra.b = s;

    return (net->in_ctx = ctx);
}


static int core_input_filter(ap_filter_t * const restrict f,
                             apr_bucket_brigade * const restrict bb,
                             const ap_input_mode_t mode,
                             const apr_read_type_e block,
                             const apr_off_t readbytes)
{
    /* [could make this even simpler if core input filter ctx was initialized
     *  in core_pre_connection() in server/core.c when added to filter chain,
     *  rather than checking on each call.  After all, we always read request!
     *  (Move core_input_filter_init() contents into core_pre_connection())]
     */
    core_net_rec * const restrict net = f->ctx;
    ap_filter_ra_t * const restrict ra =
      &(net->in_ctx ? net->in_ctx : core_input_filter_init(f))->ra;

    return ap_brigade_ra_macro(ra, f, bb, mode, block, readbytes, 0);
}
/* Note that every filter that uses ap_brigade_ra() will have consistent
 * behavior.  Since the core input filter is the default end of chain,
 * it only makes sense that it be this simple, just reading from its brigade
 * and returning the data/metadata.
 */





/*(do not use on lines longer than UINT_MAX)*/
static void
rtrim(char * const restrict s)
{
    unsigned int i = strlen(s);
    while (i != 0) {
        switch (s[--i]) {
          case APR_ASCII_BLANK: case APR_ASCII_TAB:
          case APR_ASCII_CR:    case APR_ASCII_LF:
            continue; /* remove trailing whitespace */
          default:
            s[++i] = '\0';
            return;
        }
    }
    s[0] = '\0';
}


/*(do not use on lines longer than UINT_MAX)*/
static void
lrtrim(char * const restrict s)
{
    unsigned int i = 0;
    unsigned int j;
    while (s[i] == APR_ASCII_BLANK || s[i] == APR_ASCII_TAB) {
        i++; /* remove leading whitespace */
    }
    if (s[i] != '\0') {
        j = i+strlen(s+i);
        for (;;) {
            switch (s[--j]) {
              case APR_ASCII_BLANK: case APR_ASCII_TAB:
              case APR_ASCII_CR:    case APR_ASCII_LF:
                continue; /* remove trailing whitespace */
              default:
                s[++j] = '\0';
                if (i != 0) {
                    memmove(s, s+i, (size_t)(j-i+1));
                }
                return;
            }
        }
    }
    else {
        s[0] = '\0';
    }
}


/* validate field label, remove trailing label whitespace, return field value
 * (If field label is valid, NIL overwrites char after label,
 *  else NULL is returned and line is unmodified)
 * A 'field-name' (RFC 2616 Section 4.2) is defined as a 'token' and a
 * 'token' (RFC 2616 Section 2.2) is defined as one or more chars EXCEPT
 * CTLs or separators, which are:  ()<>@,;:\"/[]?={}  SP (' ') HT ('\t')
 * (CTLs are 0x00-0x1F, and 0x7F; SP is 0x20, HT is 0x09) */
/*(do not use on lines longer than UINT_MAX, else use size_t not uint)*/
char *
mime_label_validate(char * const restrict label)
{
    unsigned int i, j = 0;
    for (i = 0; label[i] > 0x20; i++) {
        switch (label[i]) {
          default:
            continue;
          case ':': case '/': case '"': case ',': case '\\': case 0x7f:
          case '(': case ')': case '<': case '>': case '@': case ';':
          case '[': case ']': case '{': case '}': case '?': case '=':
            break;
        }
        break;
    }
    return (i!=0 && (label[i]==':' || label[i+(j=strspn(label+i," \t"))]==':'))
      ? (label[i] = '\0', label+i+j+1)
      : NULL;
}


/*(intended for use on the value portion of folded MIME headers)*/
/*(do not use on lines longer than UINT_MAX)*/
static void
mime_value_unfold_chomp(char * const restrict s)
{
    unsigned int i = 0;
    unsigned int j = 0;
    while (s[i] == APR_ASCII_BLANK || s[i] == APR_ASCII_TAB) {
        i++; /* remove leading whitespace on first line */
    }
    while (s[i] != '\0') { /* unfold continuation lines; remove internal CRLF */
        if (s[i] != APR_ASCII_CR && s[i] != APR_ASCII_LF) {
            s[j++] = s[i++];
        }
        else {
            while (j!=0 && (s[j-1]==APR_ASCII_BLANK || s[j-1]==APR_ASCII_TAB)) {
                j--; /* remove trailing whitespace on previous line */
            }
            if (s[i+1] == APR_ASCII_LF) {
                i++;
            }
            if (s[++i] == APR_ASCII_BLANK || s[i] == APR_ASCII_TAB) {
                s[j++] = s[i++]; /*(keep first whitespace char of folded line)*/
                while (s[i] == APR_ASCII_BLANK || s[i] == APR_ASCII_TAB);
                    i++; /* remove leading whitespace on next line */
                }
            }
        }
    }
    s[j] = '\0';
}


/* Convenience routine which reads in mode AP_MODE_LINE | AP_MODE_MIME_FOLDING
 * flattens each line, parses the field, and adds label and value to give table.
 *
 * limit_mime_fields is a pointer to an int that is decremented
 * pass NULL for limit_mime_fields for no limit.
 *
 * caller must check rv and must check for metadata (including EOS) upon return
 *   (this routine does not check if bb passed in to routine ends in EOS)
 * bb is cleared for each line and returns with final line read (or empty)
 * If empty, and APR_SUCCESS, line is not complete and would block.  Try again.
 * ra->len_downloaded is used internally and is length of last line read (or 0)
 * upon return (and APR_SUCCESS).  If APR_SUCCESS and ra->len_downloaded < 3,
 * then, since line ends in LF, if len is 1, headers finished, else read first
 * bucket and check first char CR.  If CR, then headers finished, else, bad
 * line (APR_EGENERAL)
 */
AP_DECLARE(apr_status_t)
ap_brigade_ra_mime_fields(ap_filter_ra_t * const restrict ra,
                          ap_filter_t * const restrict f,
                          apr_bucket_brigade * const restrict bb,
                          const apr_read_type_e block,
                          apr_table_t * const restrict mime_fields,
                          int * const restrict limit_mime_fields)
{
    char *label;
    char *value;
    apr_size_t len;
    apr_status_t rv;
    const apr_off_t limit_mime_fieldsize =
      (apr_off_t) f->r->server->limit_req_fieldsize + 2; /*CRLF*/

    for (;;) {
        (void) apr_brigade_cleanup(bb);
        ra->len_downstream = 0;
        rv = ap_brigade_ra_line(ra, f, bb, AP_MODE_LINE | AP_MODE_MIME_FOLDING,
                                block, limit_mime_fieldsize, -1);
        if (rv != APR_SUCCESS
            || (len = (apr_size_t)ra->len_downstream) < 3) {
            /* (field must have at least one char, colon, LF (>= 3 chars)) */
            return rv;
        }

        if (!limit_mime_fields || *limit_mime_fields-- != 0) {
            label = apr_palloc(f->r->pool, len);
            (void) apr_brigade_flatten(bb, label, len);
          #if APR_CHARSET_EBCDIC/*(needed?  shouldn't this be done as filter?)*/
            ap_xlate_proto_from_ascii(label, len);
          #endif
            if ((value = mime_label_validate(label)) != NULL) {
                /* unfold MIME-folded value and trim outer whitespace */
                label[len-1] = '\0'; /*(overwrite LF with NIL)*/
                mime_value_unfold_chomp(value);
            }
            else { /* invalid field: missing colon, or invalid or empty label */
                return APR_EGENERAL;
            }
        }
        else {  /* exceeded max num fields allowed */
            return APR_ENOSPC;
        }

        /* tack header and value onto table list; caller should merge later */
        apr_table_addn(mime_fields, label, value);
    }
}


APR_DECLARE(apr_status_t)
ap_ra_getline_flatten(ap_filter_ra_t * const restrict ra,
                      ap_filter_t * const restrict f,
                      apr_bucket_brigade * const restrict bb,
                      const ap_input_mode_t mode,
                      const apr_read_type_e block,
                      char ** restrict s, const apr_size_t n)
{
    apr_status_rv;

    /* caller should have cleared bb; if not, discard bb contents */
    if (!APR_BRIGADE_EMPTY(bb)) {
        apr_brigade_cleanup(bb);  /*(discard (!!) bb contents)*/
    }

    /*(n is converted to apr_off_t when passing to ap_brigade_ra_line())*/
    AP_DEBUG_ASSERT(n <= INT_MAX);  /*(for what size line are you looking?!)*/

    /* get line */
    ra->len_downstream = 0;
    rv = ap_brigade_ra_line(ra, f, bb, mode, block, (apr_off_t) n, -1);

    if (rv == APR_SUCCESS && ra->len_downstream != 0) {

        /* allocate memory if requested */
        if (*s == NULL) {
            *s = apr_palloc(r->pool, ra->len_downstream);
        }

        /* flatten into buffer and overwrite LF to NIL terminate string */
        (void) apr_brigade_flatten(bb, *s, ra->len_downstream);
        (void) apr_brigade_cleanup(bb);
        (*s)[ra->len_downstream-1] = '\0';

        /* remove trailing whitespace */
        if (mode & AP_MODE_MIME_FOLDING) {
            if (**s != APR_ASCII_BLANK && **s != APR_ASCII_TAB) {
                mime_value_unfold_chomp(*s);
            }
            else { /* error, mime folded line may not start with space or tab */
                return APR_EGENERAL;
            }
        }
        else {
            rtrim(*s);
        }

    }

    return rv;
}




/*
 * (This code replaces ap_rgetline() and ap_rgetline_core(), which for
 *  compatibility, should be a macro which maps back to this ap_rgetline() func)
 * This replacement attempts compatibility with original behavior.
 * (I think that it is a design flaw that this pulls data from r->input_filters
 *  instead of from, say, the filter that is calling this routine)
 *
 * APR_EOF is returned if EOS is hit before anything else is read.
 *   (old ap_rgetline_core() returned an empty line and APR_SUCCESS, losing the
 *    EOS.  Subsequent calls to the old ap_rgetline_core() would eventually end
 *    up in the core_input_filter() which returns APR_EOF after the socket
 *    closes and is removed from the core brigade)
 * APR_INCOMPLETE is returned if EOS is hit and partial line was read.
 *   (old ap_rgetline_core() returned APR_EGENERAL)
 * An empty line is returned if the line read contained only CRLF or LF.
 * A line with a single space is returned if line was entirely whitespace.
 * This esoteric behavior (was) employed by ap_get_mime_headers(), which 
 * (used to) do its own mime line unfolding, to determine whether or not
 * the line is a continuation line of a folded line, and to differentiate
 * a line with whitespace from a blank line (CRLF) that indicates the end
 * of a section of MIME headers)
 *
 * Potential callers of this routine should prefer to create their own
 * ap_filter_ra_t and use ap_ra_getline_flatten() directly, so that
 * ra->len_downstream can be used instead of (apr_size_t *)read, metadata
 * can be handled appropriately, and so that MIME line unfolding can be
 * handled at a lower level, rather than with the bad hack documented above.
 * NB: unlike ap_rgetline(), direct callers of ap_ra_getline_flatten() on
 * EBCDIC machines must arrange for ASCII to EBCDIC translation if needed
 * (Q: why isn't an ASCII to EBCDIC filter pushed onto the filter
 *     chain as needed and then removed when its job is finished?)
 *
 * This code is very similar to ap_ra_getline_flatten() except that this must
 * detect difference between empty lines and those containing only whitespace
 */
ap_rgetline(char ** restrict s, const apr_size_t n,
            apr_size_t * const restrict read,
            request_rec * const restrict r,
            int fold, apr_bucket_brigade * const restrict bb)
{
    ap_filter_ra_t ra;
    apr_status_rv;
    const ap_input_mode_t mode = AP_MODE_LINE|(fold ? AP_MODE_MIME_FOLDING : 0);

    ap_filter_ra_init(&ra, r->pool, r->connection->bucket_alloc);

    /*(n is converted to apr_off_t when passing to ap_brigade_ra_line())*/
    AP_DEBUG_ASSERT(n <= INT_MAX);  /*(for what size line are you looking?!)*/

    do {
        ap_brigade_cleanup(bb);  /*discard (!!) contents (including metadata!)*/
        ra->len_downstream = 0;
        rv = ap_brigade_ra_line(&ra, r->input_filters, bb, mode,
                                APR_BLOCK_READ, (apr_off_t) n, -1);
    } while (rv == APR_SUCCESS && ra.len_downstream == 0);

    if (ra.len_downstream != 0) {  /* rv == APR_SUCCESS, too */

        /* allocate memory if requested */
        if (*s == NULL) {
            *s = apr_palloc(r->pool, ra.len_downstream);
        }

        /* flatten into buffer and overwrite LF to NIL terminate string */
        (void) apr_brigade_flatten(bb, *s, ra.len_downstream);
        (void) apr_brigade_cleanup(bb);
        (*s)[ra.len_downstream-1] = '\0';

        /* remove trailing whitespace */
        if (mode & AP_MODE_MIME_FOLDING) {
            if (**s != APR_ASCII_BLANK && **s != APR_ASCII_TAB) {
                mime_value_unfold_chomp(*s);
            }
            else { /* error, mime folded line may not start with space or tab */
                return APR_EGENERAL;
            }
        }
        else {
            const char c = **s; /* store first char */
            rtrim(*s);
            if (**s == '\0' && (c == APR_ASCII_BLANK || c == APR_ASCII_TAB)) {
                *s[0] = c;
                *s[1] = '\0';
            }
        }

        *read = strlen(*s);

      #if APR_CHARSET_EBCDIC  /*(needed?  shouldn't this be done as a filter?)*/
        ap_xlate_proto_from_ascii(*s, *read);
      #endif

    }
    else {
        *read = 0;
    }

    ap_filter_ra_destroy(&ra);

    return rv;
}


/* ap_brigade_ra* version of ap_get_mime_headers_core()
 * return is void, but if there is an error, r->status is changed
 *   to HTTP_BAD_REQUEST
 * (ap_get_mime_headers_core() originally set the offending line in the
 *  error-note, but this routine does not.  It would make more sense to 
 *  take the first say, 64 chars of the line instead of a whole lotta garbage)
 * (Caller should set r->status to HTTP_REQUEST_TIMEOUT (or something other
 *  than HTTP_BAD_REQUEST) before entering this routine, and should check if
 *  r->status has changed to HTTP_BAD_REQUEST upon return from this routine)
 */
AP_DECLARE(void)
ap_get_mime_headers_core(request_rec * const restrict r,
                         apr_bucket_brigade * const restrict bbtmp)
{
    ap_filter_ra_t ra;
    apr_status_rv;
    int limit_req_fields_decr = r->server->limit_req_fields;
    int * const restrict limit_req_fields_p =
      limit_req_fields_decr != 0
        ? &limit_req_fields_decr
        : NULL;

    ap_filter_ra_init(&ra, r->pool, r->connection->bucket_alloc);

    do {
        rv = ap_brigade_ra_mime_fields(&ra, r->input_filters, bbtmp,
                                       APR_BLOCK_READ, r->headers_in,
                                       limit_req_fields_p);
    } while (rv == APR_SUCCESS && ra.len_downstream == 0);

    if (rv == APR_SUCCESS) {
        /* (if we reach here, line ends in LF and len < 3, so only need
         *  to check that len is 2 and first char is a CR, or len is 1) */
        apr_bucket * const restrict b = APR_BRIGADE_FIRST(bbtmp);
        const char *data; apr_size_t dlen;
        (void) apr_brigade_read(b, &data, &dlen);
        if ((len == 2 && *data == APR_ASCII_CR) || len == 1) {
            apr_table_compress(r->headers_in, APR_OVERLAP_TABLES_MERGE);
            apr_brigade_cleanup(bbtmp);
            ap_filter_ra_destroy(&ra);
            return; /* finished receiving trailers; success! */
        }
        /* else invalid line; must be at least 3 chars (token : LF) */
        rv = APR_EGENERAL;
    }

    /* APR_EGENERAL   = invalid field: missing colon, invalid or empty label
     * APR_ENOSPC     - trailer line was too long, or too many headers
     * APR_INCOMPLETE - premature end of headers
     * APR_EOF        - premature end of headers
     * (and possibly other errors, such as APR_ECONNABORTED)
     * Translate errors that do not directly reflect on with HTTP_IN caller
     * request to read data.  (We are not reading data for it here; we're
     * reading data for our protocol)
     */
    r->status = HTTP_BAD_REQUEST;
    r->connection->keepalive = AP_CONN_CLOSE;
    if (rv == APR_ENOSPC) {
        if (limit_req_fields_decr < 0) {
            apr_table_setn(r->notes, "error-notes",
                           "The number of request header fields "
                           "exceeds server limit.");
        }
        else {
            apr_table_setn(r->notes, "error-notes",
                           "Size of a request header field exceeds "
                           "server limit."); /* BTW: partial line is in ra */
        }
    }
    else if (rv == APR_INCOMPLETE || rv == APR_EOF) {
        apr_table_setn(r->notes, "error-notes",
                       "Premature end of request headers");
    }
    else { /* invalid field: missing colon, or invalid or empty label */
        apr_table_setn(r->notes, "error-notes",
                       "Invalid header line");  /* BTW: line is in bbtmp */
    }

    apr_brigade_cleanup(bbtmp);
    ap_filter_ra_destroy(&ra);
}

Reply via email to