On Tue, Jan 7, 2020 at 5:06 PM Joe Orton <[email protected]> wrote:
>
> This is a classic heisenbug since it is timing related, when you add
> more debugging it slows the server down enough that the client can keep
> up, write never returns EAGAIN, and the bug disappears, I think...
OK, that explains why I can't reproduce..
Based on your traces, I think I figured out how it happens though, see below.
>
> I'm afraid I don't understand the dance being done with setaside and
> reinstate in the filters. Following the debugging through from the last
> ap_pass_brigade from mod_test_pass_brigade -
>
> [Tue Jan 07 13:09:32.090818 2020] [:info] [pid 117876:tid 140235421763328]
> [client 127.0.0.1:41112] [mod_test_pass_brigade] wrote 8192 of 8192 bytes
> [Tue Jan 07 13:09:32.090821 2020] [:info] [pid 117876:tid 140235421763328]
> [client 127.0.0.1:41112] [mod_test_pass_brigade] done writing 10240000 of
> 10240000 bytes
> [Tue Jan 07 13:09:32.090824 2020] [core:trace6] [pid 117876:tid
> 140235421763328] util_filter.c(1015): [client 127.0.0.1:41112] reinstate full
> brigade to full brigade in 'req_core' output filter
> [Tue Jan 07 13:09:32.090827 2020] [core:trace8] [pid 117876:tid
> 140235421763328] util_filter.c(1133): [client 127.0.0.1:41112] brigade
> contains: bytes: 49205, non-file bytes: 49205, eor buckets: 1, morphing
> buckets
> : 0
> [Tue Jan 07 13:09:32.090829 2020] [core:trace6] [pid 117876:tid
> 140235421763328] util_filter.c(942): [client 127.0.0.1:41112] setaside full
> brigade to empty brigade in 'req_core' output filter
Here, the call stack is:
ap_process_request()
=> ap_process_async_request()
=> ap_process_request_after_handler()
=> ap_pass_brigade()
=> ap_request_core_filter()
so ap_request_core_filter() is called with the EOR bucket, but since
there are still pending/setaside/unsent data in the core filter then
ap_request_core_filter() returns without passing all of its own
pending data (including the EOR).
> [Tue Jan 07 13:09:32.090835 2020] [core:trace6] [pid 117876:tid
> 140235421763328] util_filter.c(1015): [client 127.0.0.1:41112] reinstate full
> brigade to full brigade in 'core' output filter
Here, we are now at:
ap_process_request()
=> ap_pass_brigade(c->output_filters, ...)
but ap_request_core_filter() is not a connection filter (i.e. not part
of c->output_filters), so it will never be called again.
Since it's not called from ap_run_output_pending() either, I think
mpm_event has the bug too.
Could you please try the attached patch?
The goal is to make ap_request_core_filter() a connection filter, so
that it remains in c->output_filters until the EOR is handled.
The patch has subtleties, but ap_request_core_filter() is really
special. It has to do its best to use r->pool for setasides and
morphing buckets until the EOR, and depending on network congestion
there may be multiple ap_request_core_filter()s stacked (one per
request not fully flushed) so I had to introduce
AP_FILTER_PROTO_INSERT_BEFORE to stack them in the right order...
Regards,
Yann.
Index: include/util_filter.h
===================================================================
--- include/util_filter.h (revision 1872441)
+++ include/util_filter.h (working copy)
@@ -752,6 +752,9 @@ AP_DECLARE(void) ap_filter_protocol(ap_filter_t* f
/** Filter is incompatible with "Cache-Control: no-transform" */
#define AP_FILTER_PROTO_TRANSFORM 0x20
+/** Filter is inserted before existing filters with same ftype */
+#define AP_FILTER_PROTO_INSERT_BEFORE 0x40
+
/**
* @}
*/
Index: server/util_filter.c
===================================================================
--- server/util_filter.c (revision 1872441)
+++ server/util_filter.c (working copy)
@@ -42,6 +42,8 @@
*/
#define INSERT_BEFORE(f, before_this) ((before_this) == NULL \
|| (before_this)->frec->ftype > (f)->frec->ftype \
+ || ((before_this)->frec->ftype == (f)->frec->ftype && \
+ ((f)->frec->proto_flags & AP_FILTER_PROTO_INSERT_BEFORE)) \
|| (before_this)->r != (f)->r)
/* Trie structure to hold the mapping from registered
Index: modules/http/http_core.c
===================================================================
--- modules/http/http_core.c (revision 1872441)
+++ modules/http/http_core.c (working copy)
@@ -268,8 +268,11 @@ static int http_create_request(request_rec *r)
NULL, r, r->connection);
ap_add_output_filter_handle(ap_http_outerror_filter_handle,
NULL, r, r->connection);
+ /* ap_request_core_filter() is an AP_FTYPE_CONNECTION filter
+ * which is passed 'r' as f->ctx (not f->r).
+ */
ap_add_output_filter_handle(ap_request_core_filter_handle,
- NULL, r, r->connection);
+ r, NULL, r->connection);
}
return OK;
Index: modules/http/http_request.c
===================================================================
--- modules/http/http_request.c (revision 1872441)
+++ modules/http/http_request.c (working copy)
@@ -350,7 +350,6 @@ AP_DECLARE(void) ap_process_request_after_handler(
apr_bucket_brigade *bb;
apr_bucket *b;
conn_rec *c = r->connection;
- ap_filter_t *f;
bb = ap_acquire_brigade(c);
@@ -371,15 +370,11 @@ AP_DECLARE(void) ap_process_request_after_handler(
/* All the request filters should have bailed out on EOS, and in any
* case they shouldn't have to handle this EOR which will destroy the
- * request underneath them. So go straight to the core request filter
- * which (if any) will take care of the setaside buckets.
+ * request underneath them. So go straight to the connection filters,
+ * the first of which being ap_request_core_filter() to take care of
+ * request-level setaside buckets.
*/
- for (f = r->output_filters; f; f = f->next) {
- if (f->frec == ap_request_core_filter_handle) {
- break;
- }
- }
- ap_pass_brigade(f ? f : c->output_filters, bb);
+ ap_pass_brigade(c->output_filters, bb);
/* The EOR bucket has either been handled by an output filter (eg.
* deleted or moved to a buffered_bb => no more in bb), or an error
Index: server/core.c
===================================================================
--- server/core.c (revision 1872441)
+++ server/core.c (working copy)
@@ -5917,8 +5917,9 @@ static void register_hooks(apr_pool_t *p)
ap_register_output_filter("CORE", ap_core_output_filter,
NULL, AP_FTYPE_NETWORK);
ap_request_core_filter_handle =
- ap_register_output_filter("REQ_CORE", ap_request_core_filter,
- NULL, AP_FTYPE_CONNECTION - 1);
+ ap_register_output_filter_protocol("REQ_CORE", ap_request_core_filter,
+ NULL, AP_FTYPE_CONNECTION + 1,
+ AP_FILTER_PROTO_INSERT_BEFORE);
ap_subreq_core_filter_handle =
ap_register_output_filter("SUBREQ_CORE", ap_sub_req_output_filter,
NULL, AP_FTYPE_CONTENT_SET);
Index: server/request.c
===================================================================
--- server/request.c (revision 1872441)
+++ server/request.c (working copy)
@@ -2061,9 +2061,9 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_sub_req_ou
AP_CORE_DECLARE_NONSTD(apr_status_t) ap_request_core_filter(ap_filter_t *f,
apr_bucket_brigade *bb)
{
+ request_rec *r = f->ctx;
apr_status_t status = APR_SUCCESS;
apr_read_type_e block = APR_NONBLOCK_READ;
- conn_rec *c = f->r->connection;
apr_bucket *flush_upto = NULL;
apr_bucket_brigade *tmp_bb;
apr_size_t tmp_bb_len = 0;
@@ -2079,10 +2079,14 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_request_co
return ap_pass_brigade(f->next, bb);
}
- conf = ap_get_core_module_config(f->r->server->module_config);
+ conf = ap_get_core_module_config(r->server->module_config);
- /* Reinstate any buffered content */
+ /* Reinstate any buffered content, pretending to be a request filter
+ * such that morphing buckets are not flushed and can be handled here.
+ */
+ f->r = r;
ap_filter_reinstate_brigade(f, bb, &flush_upto);
+ f->r = NULL;
/* After EOR is passed downstream, anything pooled on the request may
* be destroyed (including bb!), but not tmp_bb which is acquired from
@@ -2129,7 +2133,7 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_request_co
break;
}
/* Flush everything so far and retry in blocking mode */
- bucket = apr_bucket_flush_create(c->bucket_alloc);
+ bucket = apr_bucket_flush_create(f->c->bucket_alloc);
block = APR_BLOCK_READ;
}
else {
@@ -2161,14 +2165,20 @@ AP_CORE_DECLARE_NONSTD(apr_status_t) ap_request_co
}
}
- /* Don't touch *bb after seen_eor */
+ /* Don't touch *r or *bb after seen_eor */
if (!seen_eor) {
apr_status_t rv;
+
+ /* Setaside any remaining content, pretending to be a request filter
+ * such that r->pool can be used.
+ */
+ f->r = r;
APR_BRIGADE_PREPEND(bb, tmp_bb);
rv = ap_filter_setaside_brigade(f, bb);
if (status == APR_SUCCESS) {
status = rv;
}
+ f->r = NULL;
}
ap_release_brigade(f->c, tmp_bb);