On Tue, Apr 20, 2021 at 12:40 PM Ruediger Pluem <rpl...@apache.org> wrote:
>
> On 4/18/21 10:00 PM, Yann Ylavic wrote:
>
> >
> > For trunk though, there is the ssl_io_filter_coalesce() case where
> > !ap_filter_should_yield() does not mean that
> > ap_filter_output_pending() has nothing to do. That's because
> > ssl_io_filter_coalesce() does not play the
> > ap_filter_{setaside,reinstate}_brigade() game for now, even though it
> > potentially retains data.
> > So in r1879416 I made a band aid such that ssl_io_filter_coalesce()
> > releases its data when it's called from ap_filter_output_pending(),
>
> Why should ap_filter_output_pending() call ssl_io_filter_coalesce?
> As far as I see ssl_io_filter_coalesce does not get added to
> the pending_output_filters ring and its private filter brigade would need
> to be non empty to get called. Or is it called indirectly?

So I made ssl_io_filter_coalesce() enter the pending_output_filters
ring in r1889938 but it's still not enough because when it's called
with an empty brigade (like ap_filter_output_pending() does),
ssl_io_filter_coalesce() still retains its data.
I could special-case the empty brigade so that
ssl_io_filter_coalesce() releases everything, but this does not
address the tunneling loop case in mod_proxy where we shouldn't call
ap_filter_output_pending() if ap_filter_should_yield() already (or we
risk blocking).

So my plan now is to define a new bucket type (WC, for Write
Completion) and use it for both ap_filter_output_pending() (instead of
the empty brigade) and ap_proxy_transfer_between_connections(), to
tell coalescing/buffering filters that they should pass their data.
Any metadata bucket will do for ssl_io_filter_coalesce(), but the
FLUSH bucket is a bit too much (could make the core output filter
block) so there is no existing one to (ab)use AFAICT.

This is the attached patch, WDYT?

The WC bucket could also help reintroduce THRESHOLD_MIN_WRITE
(FlushMinThreshold) which was removed from the core output filter in
trunk because (I think) it defeated the write completion
(setaside/reinstate) mechanism. Not in this patch, but if the WC
bucket sounds like a good plan, it could be a follow up..

Regards;
Yann.
Index: include/util_filter.h
===================================================================
--- include/util_filter.h	(revision 1889852)
+++ include/util_filter.h	(working copy)
@@ -763,7 +763,31 @@ AP_DECLARE(void) ap_filter_protocol(ap_filter_t* f
 /** Filter is incompatible with "Cache-Control: no-transform" */
 #define AP_FILTER_PROTO_TRANSFORM 0x20
 
+/** Write Completion (WC) bucket */
+AP_DECLARE_DATA extern const apr_bucket_type_t ap_bucket_type_wc;
+
 /**
+ * Determine if a bucket is a Write Completion (WC) bucket
+ * @param e The bucket to inspect
+ * @return true or false
+ */
+#define AP_BUCKET_IS_WC(e) ((e)->type == &ap_bucket_type_wc)
+
+/**
+ * Make the bucket passed in a Write Completion (WC) bucket
+ * @param b The bucket to make into a WC bucket
+ * @return The new bucket, or NULL if allocation failed
+ */
+AP_DECLARE(apr_bucket *) ap_bucket_wc_make(apr_bucket *b);
+
+/**
+ * Create a bucket referring to a Write Completion (WC).
+ * @param list The freelist from which this bucket should be allocated
+ * @return The new bucket, or NULL if allocation failed
+ */
+AP_DECLARE(apr_bucket *) ap_bucket_wc_create(apr_bucket_alloc_t *list);
+
+/**
  * @}
  */
 
Index: server/util_filter.c
===================================================================
--- server/util_filter.c	(revision 1889852)
+++ server/util_filter.c	(working copy)
@@ -976,6 +976,12 @@ AP_DECLARE(apr_status_t) ap_filter_setaside_brigad
              e = next) {
             next = APR_BUCKET_NEXT(e);
 
+            /* Strip WC buckets added by ap_filter_output_pending(). */
+            if (AP_BUCKET_IS_WC(e)) {
+                apr_bucket_delete(e);
+                continue;
+            }
+
             /* Opaque buckets (length == -1) are moved, so assumed to have
              * next EOR's lifetime or at least the lifetime of the connection.
              */
@@ -1268,7 +1274,10 @@ AP_DECLARE_NONSTD(int) ap_filter_output_pending(co
         if (!APR_BRIGADE_EMPTY(fp->bb)) {
             ap_filter_t *f = fp->f;
             apr_status_t rv;
+            apr_bucket *b;
 
+            b = ap_bucket_wc_create(bb->bucket_alloc);
+            APR_BRIGADE_INSERT_TAIL(bb, b);
             rv = ap_pass_brigade(f, bb);
             apr_brigade_cleanup(bb);
 
@@ -1279,8 +1288,7 @@ AP_DECLARE_NONSTD(int) ap_filter_output_pending(co
                 break;
             }
 
-            if ((fp->bb && !APR_BRIGADE_EMPTY(fp->bb))
-                    || (f->next && ap_filter_should_yield(f->next))) {
+            if (ap_filter_should_yield(f)) {
                 rc = OK;
                 break;
             }
@@ -1391,3 +1399,41 @@ AP_DECLARE(void) ap_filter_protocol(ap_filter_t *f
 {
     f->frec->proto_flags = flags ;
 }
+
+
+static apr_status_t wc_bucket_read(apr_bucket *b, const char **str,
+                                   apr_size_t *len, apr_read_type_e block)
+{
+    *str = NULL;
+    *len = 0;
+    return APR_SUCCESS;
+}
+
+AP_DECLARE(apr_bucket *) ap_bucket_wc_make(apr_bucket *b)
+{
+    b->length = 0;
+    b->start  = 0;
+    b->data   = NULL;
+    b->type   = &ap_bucket_type_wc;
+
+    return b;
+}
+
+AP_DECLARE(apr_bucket *) ap_bucket_wc_create(apr_bucket_alloc_t *list)
+{
+    apr_bucket *b = apr_bucket_alloc(sizeof(*b), list);
+
+    APR_BUCKET_INIT(b);
+    b->free = apr_bucket_free;
+    b->list = list;
+    return ap_bucket_wc_make(b);
+}
+
+AP_DECLARE_DATA const apr_bucket_type_t ap_bucket_type_wc = {
+    "WC", 5, APR_BUCKET_METADATA,
+    apr_bucket_destroy_noop,
+    wc_bucket_read,
+    apr_bucket_setaside_noop,
+    apr_bucket_split_notimpl,
+    apr_bucket_simple_copy
+};
Index: modules/proxy/proxy_util.c
===================================================================
--- modules/proxy/proxy_util.c	(revision 1889852)
+++ modules/proxy/proxy_util.c	(working copy)
@@ -4410,8 +4410,9 @@ PROXY_DECLARE(apr_status_t) ap_proxy_transfer_betw
 {
     apr_status_t rv;
     int flush_each = 0;
-    unsigned int num_reads = 0;
+    int num_reads = 0;
     apr_off_t len;
+    apr_bucket *b;
 
     /*
      * Compat: since FLUSH_EACH is default (and zero) for legacy reasons, we
@@ -4465,7 +4466,6 @@ PROXY_DECLARE(apr_status_t) ap_proxy_transfer_betw
         }
         ap_proxy_buckets_lifetime_transform(r, bb_i, bb_o);
         if (flush_each) {
-            apr_bucket *b;
             /*
              * Do not use ap_fflush here since this would cause the flush
              * bucket to be sent in a separate brigade afterwards which
@@ -4479,6 +4479,10 @@ PROXY_DECLARE(apr_status_t) ap_proxy_transfer_betw
             b = apr_bucket_flush_create(bb_o->bucket_alloc);
             APR_BRIGADE_INSERT_TAIL(bb_o, b);
         }
+        else {
+            b = ap_bucket_wc_create(bb_o->bucket_alloc);
+            APR_BRIGADE_INSERT_TAIL(bb_o, b);
+        }
         rv = ap_pass_brigade(c_o->output_filters, bb_o);
         apr_brigade_cleanup(bb_o);
         if (rv != APR_SUCCESS) {
@@ -4493,23 +4497,13 @@ PROXY_DECLARE(apr_status_t) ap_proxy_transfer_betw
         /* Yield if the output filters stack is full? This is to avoid
          * blocking and give the caller a chance to POLLOUT async.
          */
-        if (flags & AP_PROXY_TRANSFER_YIELD_PENDING) {
-            int rc = OK;
-
-            if (!ap_filter_should_yield(c_o->output_filters)) {
-                rc = ap_filter_output_pending(c_o);
-            }
-            if (rc == OK) {
-                ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
-                              "ap_proxy_transfer_between_connections: "
-                              "yield (output pending)");
-                rv = APR_INCOMPLETE;
-                break;
-            }
-            if (rc != DECLINED) {
-                rv = AP_FILTER_ERROR;
-                break;
-            }
+        if ((flags & AP_PROXY_TRANSFER_YIELD_PENDING)
+                && ap_filter_should_yield(c_o->output_filters)) {
+            ap_log_rerror(APLOG_MARK, APLOG_TRACE2, 0, r,
+                          "ap_proxy_transfer_between_connections: "
+                          "yield (output pending)");
+            rv = APR_INCOMPLETE;
+            break;
         }
 
         /* Yield if we keep hold of the thread for too long? This gives

Reply via email to