Comments inline.
--On December 31, 2005 11:45:14 PM +0000 [EMAIL PROTECTED] wrote:
===== --- httpd/httpd/trunk/include/httpd.h (original)
+++ httpd/httpd/trunk/include/httpd.h Sat Dec 31 15:45:11 2005
@@ -967,6 +967,16 @@
/** A flag to determine if the eos bucket has been sent yet */
int eos_sent;
+ /** Number of leading blank lines encountered before request header
*/ + int num_blank_lines;
+
+ /** A buffered header line, used to support header folding while
+ * reading the request */
+ char *pending_header_line;
+
+ /** If nonzero, the number of bytes allocated to hold
pending_header_line */ + apr_size_t pending_header_size;
+
As Roy pointed out, these fields should not be in request_rec. A filter or
connection context, perhaps. But, not here. (More as to why below.)
...snip...
+static apr_status_t process_request_line(request_rec *r, char *line,
+ int skip_first)
+{
+ if (!skip_first && (r->the_request == NULL)) {
+ /* This is the first line of the request */
+ if ((line == NULL) || (*line == '\0')) {
+ /* We skip empty lines because browsers have to tack a CRLF
on to the end + * of POSTs to support old CERN webservers.
+ */
+ int max_blank_lines = r->server->limit_req_fields;
+ if (max_blank_lines <= 0) {
+ max_blank_lines = DEFAULT_LIMIT_REQUEST_FIELDS;
+ }
+ r->num_blank_lines++;
+ if (r->num_blank_lines < max_blank_lines) {
+ return APR_SUCCESS;
+ }
+ }
+ set_the_request(r, line);
+ }
This will cause a segfault if line is null and we are at or above
max_blank_lines. Perhaps you meant for an else clause here?
+ else {
+ /* We've already read the first line of the request. This is
either + * a header field or the blank line terminating the header
+ */
+ if ((line == NULL) || (*line == '\0')) {
+ if (r->pending_header_line != NULL) {
+ apr_status_t rv = set_mime_header(r,
r->pending_header_line); + if (rv != APR_SUCCESS) {
+ return rv;
+ }
+ r->pending_header_line = NULL;
+ }
+ if (r->status == HTTP_REQUEST_TIME_OUT) {
+ apr_table_compress(r->headers_in,
APR_OVERLAP_TABLES_MERGE);
+ r->status = HTTP_OK;
Say what? ...looks at rest of file...
Is this because r->status is unset and we're saying that's it's 'okay' to
proceed with the request. If so, this *really* needs a comment to that
effect. It makes no sense whatsoever otherwise. (We should probably
remove the hack to set it to HTTP_REQUEST_TIME_OUT initially as part of
these changes.)
+ }
+ }
+ else {
+ if ((*line == ' ') || (*line == '\t')) {
+ /* This line is a continuation of the previous one */
+ if (r->pending_header_line == NULL) {
+ r->pending_header_line = line;
+ r->pending_header_size = 0;
+ }
+ else {
+ apr_size_t pending_len =
strlen(r->pending_header_line);
+ apr_size_t fold_len = strlen(line);
This seems really expensive. You shouldn't need to recount pending_len
each time.
+ if (pending_len + fold_len >
+ r->server->limit_req_fieldsize) {
+ /* CVE-2004-0942 */
+ r->status = HTTP_BAD_REQUEST;
+ return APR_ENOSPC;
+ }
+ if (pending_len + fold_len + 1 >
r->pending_header_size) { + /* Allocate a new
buffer big enough to hold the + * concatenated
lines
+ */
+ apr_size_t new_size = r->pending_header_size;
+ char *new_buf;
+ if (new_size == 0) {
+ new_size = pending_len + fold_len + 1;
+ }
+ else {
+ do {
+ new_size *= 2;
+ } while (new_size < pending_len + fold_len +
1); + }
+ new_buf = (char *)apr_palloc(r->pool, new_size);
+ memcpy(new_buf, r->pending_header_line,
pending_len); + r->pending_header_line = new_buf;
+ r->pending_header_size = new_size;
+ }
+ memcpy(r->pending_header_line + pending_len, line,
+ fold_len + 1);
See, we know how much we've written each time...
+ /* Read and process lines of the request until we
+ * encounter a complete request header, an error, or EAGAIN
+ */
+ tmp_bb = apr_brigade_create(r->pool, r->connection->bucket_alloc);
+ while (r->status == HTTP_REQUEST_TIME_OUT) {
+ char *line = NULL;
+ apr_size_t line_length;
+ apr_size_t length_limit;
+ int first_line = (r->the_request == NULL);
+ if (first_line) {
+ length_limit = r->server->limit_req_line;
+ }
+ else {
+ if (r->assbackwards) {
+ r->status = HTTP_OK;
+ break;
+ }
+ length_limit = r->server->limit_req_fieldsize;
+ }
+ /* TODO: use a nonblocking call in place of ap_rgetline */
+ rv = ap_rgetline(&line, length_limit + 2,
+ &line_length, r, 0, tmp_bb);
+ if (rv == APR_SUCCESS) {
+ rv = process_request_line(r, line, 0);
+ }
+ if (rv != APR_SUCCESS) {
+ r->request_time = apr_time_now();
+ /* ap_rgetline returns APR_ENOSPC if it fills up the
+ * buffer before finding the end-of-line. This is only
going to + * happen if it exceeds the configured limit for a
request-line. + */
+ if (rv == APR_ENOSPC) {
+ if (first_line) {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
+ "request failed: URI too long (longer
than %d)", + r->server->limit_req_line);
+ r->status = HTTP_REQUEST_URI_TOO_LARGE;
+ r->proto_num = HTTP_VERSION(1,0);
+ r->protocol = apr_pstrdup(r->pool, "HTTP/1.0");
+ }
+ else {
+ ap_log_rerror(APLOG_MARK, APLOG_ERR, 0, r,
+ "request failed: header line too long "
+ "(longer than %d)",
+ r->server->limit_req_fieldsize);
+ r->status = HTTP_BAD_REQUEST;
+ }
+ }
+ break;
If I understand your direction, it'd bail out here if it ever got EAGAIN?
+ }
+ }
+ apr_brigade_destroy(tmp_bb);
+ return rv;
+}
+
+request_rec *ap_read_request(conn_rec *conn)
+{
+ request_rec *r;
+ apr_status_t rv;
+
+ r = init_request(conn);
+
+ rv = read_partial_request(r);
+ /* TODO poll if EAGAIN */
+ if (r->status != HTTP_OK) {
+ ap_send_error_response(r, 0);
+ ap_update_child_status(conn->sbh, SERVER_BUSY_LOG, r);
+ ap_run_log_transaction(r);
+ return NULL;
+ }
Obviously, this can't be the case if you want to do real polling. This
would be the wrong place to poll. You have to exit out of ap_read_request
and go back up to an 'event thread' that waits until there's data to read
on any incoming sockets. Then, you'd have to call ap_read_request again to
'restart' the parser whenever there is more data to read.
In my opinion, this code isn't even close to being async. So, I wonder why
it was even merged to trunk right now. You'd have to deal with partial
lines and the state the header parser is in when it gets the next chunk of
data - which this code doesn't even try to do. The current code is just
going to bail when it doesn't have a 'complete' line.
My hunch is that you plan to build up pending_header_line and delay parsing
until you have the line terminators; but that's not going to work really
well with non-blocking sockets as you have to store the data you just read
non-blocking'ly if you actually read multiple MIME header lines or, icky,
even part of the request body or the next request. This just lends more
credence to the argument that request_rec is the wrong place for the
buffer. It's clearly connection or connection-filter oriented...
FWIW, serf does all of its header parsing network asynchronously; but the
HTTP header parser and buckets have a concept of state:
<http://svn.webdav.org/repos/projects/serf/trunk/buckets/response_buckets.c>
(IIRC, another point that'll kill httpd is that httpd mis-uses EAGAIN; serf
has a definition that EAGAIN means some data is read *and* you need to ask
for more. httpd's filters don't have that. One more issue that httpd got
wrong...)
To me, it sounds like we need a way for the MPM (or whatever) to redefine
the accept logic instead of relying upon the core. Oh, yah, we do that
already (heh) by allowing them to call something other than
ap_process_connection: such as with ap_process_http_async_connection().
So, why are we even touching ap_read_request() at all? This is going to
necessitate an ap_async_read_request() anyway... -- justin