I've prototyped a modification to the event MPM that uses a (mostly) nonblocking
output filter in place of ap_core_output_filter().

This patch adds a new connection state, CONN_STATE_WRITE_COMPLETION.

The new network-level filter, event_output_filter(), does nonblocking writes (except when it encounters a flush bucket that isn't immediately before or
after an EOS or EOC).  If an attempted write yields EAGAIN, the filter
does a setaside of the unwritten data.

If there still is unwritten data buffered in event_output_filter() upon completion
of a request, the filter puts the connection in write-completion state.

When the connection is in write-completion state, the main process_socket() loop in event.c runs a loop to output the remaining data. The basic logic is:

    while (state == CONN_STATE_WRITE_COMPLETION) {
        rv = event_output_filter(...);
        if (APR_STATUS_IS_EAGAIN(rv)) {
            block until writeability on client socket (or timeout);
        }
    }

The event_output_filter() is responsible for changing the connection state
to CONN_STATE_LINGER or CONN_STATE_CHECK_REQUEST_LINE_READABLE
(depending on the keepalive settings) when all of the output has been written.

Note that this patch keeps a worker thread waiting on the connection until
it exits CONN_STATE_WRITE_COMPLETION.  The real benefits of the
nonblocking output filter will come later, when the event MPM code is
enhanced to hand off the connection to the central poller thread whenever it needs to wait for writeability. I've purposely avoided making that change so far; I want to make sure the nonblocking filter design is sound before
adding the complexity of truly async write completion.

Known bugs:
- The new filter doesn't support nonblocking reads of pipe or socket buckets.
    (It's functionally correct, I think, but not optimal.)
- It doesn't yet support "EnableSendfile off"; and it may not even compile
    on platforms that don't have sendfile.
  - Because event_output_filter() can return APR_EAGAIN, the few things
that expect it to do a blocking write--like ap_rwrite()--are broken. This
    could be fixed either by:
      1. Changing the callers to understand an EAGAIN response, or
      2. Changing event_output_filter() to return APR_SUCCESS instead
of APR_EAGAIN if the connection is not in write-completion state.

Next steps:
I need a volunteer or two to review this patch.  Given the fundamental
nature of some of the design decisions involved, I'd like to get a second
opinion before committing this into 2.3-dev.

Thanks,
Brian
Index: server/mpm/experimental/event/Makefile.in
===================================================================
--- server/mpm/experimental/event/Makefile.in   (revision 267492)
+++ server/mpm/experimental/event/Makefile.in   (working copy)
@@ -1,5 +1,5 @@
 
 LTLIBRARY_NAME    = libevent.la
-LTLIBRARY_SOURCES = event.c fdqueue.c pod.c
+LTLIBRARY_SOURCES = event.c fdqueue.c pod.c event_filters.c
 
 include $(top_srcdir)/build/ltlib.mk
Index: server/mpm/experimental/event/event.c
===================================================================
--- server/mpm/experimental/event/event.c       (revision 267492)
+++ server/mpm/experimental/event/event.c       (working copy)
@@ -50,6 +50,7 @@
 #include "apr_file_io.h"
 #include "apr_thread_proc.h"
 #include "apr_signal.h"
+#include "apr_support.h"
 #include "apr_thread_mutex.h"
 #include "apr_proc_mutex.h"
 #include "apr_poll.h"
@@ -637,6 +638,25 @@
             cs->state = CONN_STATE_LINGER;
         }
     }
+    
+    if (cs->state == CONN_STATE_WRITE_COMPLETION) {
+        ap_filter_t *output_filter = c->output_filters;
+        while (output_filter->next != NULL) {
+            output_filter = output_filter->next;
+        }
+        do {
+            apr_status_t rv;
+            rv = output_filter->frec->filter_func.out_func(output_filter, 
NULL);
+            if (APR_STATUS_IS_EAGAIN(rv)) {
+                rv = apr_wait_for_io_or_timeout(NULL, cs->pfd.desc.s, 0);
+                if (rv != APR_SUCCESS) {
+                    ap_log_error(APLOG_MARK, APLOG_CRIT, rv, ap_server_conf,
+                                 "poll for write completion failed");
+                    break;
+                }
+            }
+        } while (cs->state == CONN_STATE_WRITE_COMPLETION);
+    }
 
     if (cs->state == CONN_STATE_LINGER) {
         ap_lingering_close(c);
@@ -2189,6 +2209,8 @@
     return OK;
 }
 
+extern apr_status_t event_output_filter(ap_filter_t *f, apr_bucket_brigade 
*bb);
+
 static void event_hooks(apr_pool_t * p)
 {
     /* The worker open_logs phase must run before the core's, or stderr
@@ -2203,6 +2225,8 @@
      * to retrieve it, so register as REALLY_FIRST
      */
     ap_hook_pre_config(worker_pre_config, NULL, NULL, APR_HOOK_REALLY_FIRST);
+    ap_core_output_filter_handle = ap_register_output_filter("CORE", 
event_output_filter,
+                                                             NULL, 
AP_FTYPE_NETWORK);
 }
 
 static const char *set_daemons_to_start(cmd_parms *cmd, void *dummy,
Index: modules/http/http_core.c
===================================================================
--- modules/http/http_core.c    (revision 267492)
+++ modules/http/http_core.c    (working copy)
@@ -130,9 +130,10 @@
 
             if (c->keepalive != AP_CONN_KEEPALIVE || c->aborted 
                     || ap_graceful_stop_signalled()) {
-                cs->state = CONN_STATE_LINGER;
+                cs->state = CONN_STATE_WRITE_COMPLETION;
             }
-            else if (!c->data_in_input_filters) {
+            else if ((cs->state != CONN_STATE_WRITE_COMPLETION) &&
+                     !c->data_in_input_filters) {
                 cs->state = CONN_STATE_CHECK_REQUEST_LINE_READABLE;
             }
 
Index: modules/http/http_request.c
===================================================================
--- modules/http/http_request.c (revision 267492)
+++ modules/http/http_request.c (working copy)
@@ -264,7 +264,7 @@
         access_status = OK;
     }
 
-    if (access_status == OK) {
+    if ((access_status == OK) || APR_STATUS_IS_EAGAIN(access_status)) {
         ap_finalize_request_protocol(r);
     }
     else {
Index: include/httpd.h
===================================================================
--- include/httpd.h     (revision 267511)
+++ include/httpd.h     (working copy)
@@ -1074,6 +1074,7 @@
 typedef enum  {
     CONN_STATE_CHECK_REQUEST_LINE_READABLE,
     CONN_STATE_READ_REQUEST_LINE,
+    CONN_STATE_WRITE_COMPLETION,
     CONN_STATE_LINGER,
 } conn_state_e;
 

Attachment: event_filters.c
Description: Binary data

Reply via email to