On Fri, 2009-10-16 at 08:00 +1100, Bojan Smojver wrote:
> While playing with slowloris against prefork, I wrote the attached
> craziness.

Here is another take on the problem, call it "Craziness 2.0".

The idea here is that a busy server is highly unlikely to be stuck
reading using all its children over a maintenance interval (i.e. we
expect at least one of those readers to turn into something else during
the interval).

So, if we find that all children are stuck reading, we simply close the
sockets (SIGINT was chosen arbitrarily for this - it should probably be
something else). After closing the socket by force, we go back to
handling new requests, but this time without the need to fork more
children, which should be more gentle on the machine running the thing.

Similar approach may work for worker too, but sockets would have to be
remembered in thread specific variables and signalling would have to be
per-thread.

As always, glad to provide entertainment to the list ;-)

-- 
Bojan
--- httpd-2.2.14/server/mpm/prefork/prefork.c	2009-02-01 07:54:55.000000000 +1100
+++ httpd-2.2.14-p/server/mpm/prefork/prefork.c	2009-10-18 18:25:59.731527856 +1100
@@ -336,6 +336,17 @@
     die_now = 1;
 }
 
+static int volatile client_socket = -1;
+
+static void close_client_socket(int sig)
+{
+    if (client_socket != -1) {
+        close(client_socket);
+    }
+
+    client_socket = -1;
+}
+
 /* volatile just in case */
 static int volatile shutdown_pending;
 static int volatile restart_pending;
@@ -659,8 +670,12 @@
 
         current_conn = ap_run_create_connection(ptrans, ap_server_conf, csd, my_child_num, sbh, bucket_alloc);
         if (current_conn) {
+            apr_os_sock_get((apr_os_sock_t *)&client_socket, csd);
+
             ap_process_connection(current_conn, csd);
             ap_lingering_close(current_conn);
+
+            client_socket = -1;
         }
 
         /* Check the pod and the generation number after processing a
@@ -755,6 +770,7 @@
          * The pod is used for signalling the graceful restart.
          */
         apr_signal(AP_SIG_GRACEFUL, stop_listening);
+        apr_signal(SIGINT, close_client_socket);
         child_main(slot);
     }
 
@@ -803,6 +819,7 @@
     int free_slots[MAX_SPAWN_RATE];
     int last_non_dead;
     int total_non_dead;
+    static apr_time_t readers_maxed_out = 0;
 
     /* initialize the free_list */
     free_length = 0;
@@ -856,12 +873,14 @@
          */
         ap_mpm_pod_signal(pod);
         idle_spawn_rate = 1;
+        readers_maxed_out = 0;
     }
     else if (idle_count < ap_daemons_min_free) {
         /* terminate the free list */
         if (free_length == 0) {
             /* only report this condition once */
             static int reported = 0;
+            apr_time_t now = apr_time_now();
 
             if (!reported) {
                 ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
@@ -870,6 +889,57 @@
                 reported = 1;
             }
             idle_spawn_rate = 1;
+
+            /* Give legitimate clients one maintenance interval to
+             * finish with request reads, then close all reader sockets
+             * if we are still maxed out. Crude, but seems to clear
+             * things out.
+             */
+            if (readers_maxed_out) {
+                apr_time_t diff = now - readers_maxed_out;
+
+                if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) {
+                    for (i = 0; i < ap_daemons_limit; ++i) {
+
+                        ws = &ap_scoreboard_image->servers[i][0];
+
+                        if (ws->status != SERVER_BUSY_READ) {
+                            break;
+                        }
+                    }
+
+                    if (i >= ap_daemons_limit) {
+                        pid_t reader;
+
+                        for (i = 0; i < ap_daemons_limit; ++i) {
+                            reader = ap_scoreboard_image->parent[i].pid;
+
+                            ap_mpm_safe_kill(reader, SIGINT);
+                        }
+                    }
+                    else {
+                        readers_maxed_out = 0;
+                    }
+                }
+            }
+            else {
+                for (i = 0; i < ap_daemons_limit; ++i) {
+
+                    ws = &ap_scoreboard_image->servers[i][0];
+
+                    if (ws->status != SERVER_BUSY_READ) {
+                        break;
+                    }
+                }
+
+                /* Flooded by intentionally slow requests (e.g. slowloris)?
+                 * It is highly unlikely that every single child process
+                 * is stuck in read on a busy server.
+                 */
+                if (i >= ap_daemons_limit) {
+                    readers_maxed_out = now;
+                }
+            }
         }
         else {
             if (idle_spawn_rate >= 8) {
@@ -902,10 +972,13 @@
             else if (idle_spawn_rate < MAX_SPAWN_RATE) {
                 idle_spawn_rate *= 2;
             }
+
+            readers_maxed_out = 0;
         }
     }
     else {
         idle_spawn_rate = 1;
+        readers_maxed_out = 0;
     }
 }
 

Reply via email to