On Fri, 2009-10-16 at 08:00 +1100, Bojan Smojver wrote:
> While playing with slowloris against prefork, I wrote the attached
> craziness.
On the back of the bug #48094, I reworked the latest version of this
craziness, which now suspends worker threads before shutting down the
sockets. Just in case someone wanted to play with it.
--
Bojan
--- httpd-2.2.14-p/server/mpm/prefork/prefork.c 2009-02-01 07:54:55.000000000 +1100
+++ httpd-2.2.14/server/mpm/prefork/prefork.c 2009-11-02 12:09:50.511530535 +1100
@@ -48,6 +48,7 @@
#include "ap_listen.h"
#include "ap_mmn.h"
#include "apr_poll.h"
+#include "apr_md5.h"
#ifdef HAVE_BSTRING_H
#include <bstring.h> /* for IRIX, FD_SET calls bzero() */
@@ -336,6 +337,28 @@
die_now = 1;
}
+static int volatile client_socket = -1;
+
+#ifndef NO_USE_SIGACTION
+static void shutdown_socket(int sig, siginfo_t *info, void *context)
+#else
+static void shutdown_socket(int sig)
+#endif
+{
+#ifndef NO_USE_SIGACTION
+ if (info->si_pid == getppid()) {
+#endif
+ if (client_socket != -1) {
+ shutdown(client_socket, SHUT_RDWR);
+ }
+#ifndef NO_USE_SIGACTION
+ }
+ else {
+ clean_child_exit(0);
+ }
+#endif
+}
+
/* volatile just in case */
static int volatile shutdown_pending;
static int volatile restart_pending;
@@ -659,8 +682,12 @@
current_conn = ap_run_create_connection(ptrans, ap_server_conf, csd, my_child_num, sbh, bucket_alloc);
if (current_conn) {
+ apr_os_sock_get((apr_os_sock_t *)&client_socket, csd);
+
ap_process_connection(current_conn, csd);
ap_lingering_close(current_conn);
+
+ client_socket = -1;
}
/* Check the pod and the generation number after processing a
@@ -733,6 +760,10 @@
}
if (!pid) {
+#ifndef NO_USE_SIGACTION
+ struct sigaction act;
+#endif
+
#ifdef HAVE_BINDPROCESSOR
/* by default AIX binds to a single processor
* this bit unbinds children which will then bind to another cpu
@@ -755,6 +786,19 @@
* The pod is used for signalling the graceful restart.
*/
apr_signal(AP_SIG_GRACEFUL, stop_listening);
+
+ /* If the parent sends SIGINT to the child, we shutdown the
+ * client socket, as we suspect that we are under a DoS attack.
+ */
+#ifndef NO_USE_SIGACTION
+ memset(&act, 0, sizeof(act));
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = shutdown_socket;
+ sigaction(SIGINT, &act, NULL);
+#else
+ apr_signal(SIGINT, shutdown_socket);
+#endif
+
child_main(slot);
}
@@ -803,6 +847,8 @@
int free_slots[MAX_SPAWN_RATE];
int last_non_dead;
int total_non_dead;
+ int status;
+ static apr_time_t maxed_out = 0;
/* initialize the free_list */
free_length = 0;
@@ -813,8 +859,6 @@
total_non_dead = 0;
for (i = 0; i < ap_daemons_limit; ++i) {
- int status;
-
if (i >= ap_max_daemons_limit && free_length == idle_spawn_rate)
break;
ws = &ap_scoreboard_image->servers[i][0];
@@ -856,12 +900,17 @@
*/
ap_mpm_pod_signal(pod);
idle_spawn_rate = 1;
+ maxed_out = 0;
}
else if (idle_count < ap_daemons_min_free) {
/* terminate the free list */
if (free_length == 0) {
/* only report this condition once */
static int reported = 0;
+ static unsigned char sb_digest[APR_MD5_DIGESTSIZE];
+ apr_time_t now = apr_time_now();
+ apr_md5_ctx_t ctx;
+ pid_t pid;
if (!reported) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0, ap_server_conf,
@@ -870,6 +919,120 @@
reported = 1;
}
idle_spawn_rate = 1;
+
+ /* If after one maintenace interval we still see the same
+ * situation on the scoreboard, shutdown all client sockets
+ * in read state and at least 10% of all client sockets.
+ * Crude, but seems to clear things out.
+ */
+ if (maxed_out) {
+ apr_time_t diff = now - maxed_out;
+
+ if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) {
+ unsigned char cur_digest[APR_MD5_DIGESTSIZE];
+
+ /* Current digest of the scoreboard.
+ */
+ apr_md5_init(&ctx);
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ status = ap_scoreboard_image->servers[i][0].status;
+ apr_md5_update(&ctx, &status, sizeof(status));
+
+ pid = ap_scoreboard_image->parent[i].pid;
+ apr_md5_update(&ctx, &pid, sizeof(pid));
+ }
+ apr_md5_final(cur_digest, &ctx);
+
+ /* If we haven't had a change for one maintenance
+ * interval, we need to make room.
+ */
+ if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) {
+ maxed_out = 0;
+ }
+ else {
+ int rdrs = 0, cull = ap_daemons_limit / 10;
+
+ /* Disconnect all readers (includes keep alive).
+ */
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ pid = ap_scoreboard_image->parent[i].pid;
+ status = ap_scoreboard_image->servers[i][0].status;
+
+ if (status == SERVER_BUSY_READ ||
+ status == SERVER_BUSY_KEEPALIVE) {
+
+ ap_mpm_safe_kill(pid, SIGINT);
+ rdrs++;
+ }
+ }
+
+ /* Make up to 10% of all sockets, if required.
+ */
+ for (i = 0; i < ap_daemons_limit && cull > rdrs; ++i) {
+ status = ap_scoreboard_image->servers[i][0].status;
+
+ if (status != SERVER_BUSY_READ &&
+ status != SERVER_BUSY_KEEPALIVE) {
+
+ pid = ap_scoreboard_image->parent[i].pid;
+ ap_mpm_safe_kill(pid, SIGINT);
+ cull--;
+ }
+ }
+ }
+ }
+ }
+ else {
+ int rdrs = 0;
+
+ /* Create digest of the scorboard, see if things
+ * change next time around.
+ */
+ apr_md5_init(&ctx);
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ status = ap_scoreboard_image->servers[i][0].status;
+
+ /* These are the conditions we are concerned with.
+ */
+ switch (status) {
+ case SERVER_BUSY_READ:
+ case SERVER_BUSY_KEEPALIVE:
+ rdrs++;
+ case SERVER_BUSY_WRITE:
+ case SERVER_DEAD:
+ case SERVER_GRACEFUL:
+ break;
+ default:
+ return;
+ }
+
+ apr_md5_update(&ctx, &status, sizeof(status));
+
+ pid = ap_scoreboard_image->parent[i].pid;
+ apr_md5_update(&ctx, &pid, sizeof(pid));
+ }
+ apr_md5_final(sb_digest, &ctx);
+
+ /* Over 95% in read state (includes keep alive), clear now.
+ */
+ if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) {
+ /* Disconnect all readers (includes keep alive).
+ */
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ pid = ap_scoreboard_image->parent[i].pid;
+ status = ap_scoreboard_image->servers[i][0].status;
+
+ if (status == SERVER_BUSY_READ ||
+ status == SERVER_BUSY_KEEPALIVE) {
+ ap_mpm_safe_kill(pid, SIGINT);
+ rdrs++;
+ }
+ }
+ }
+ else {
+ maxed_out = now;
+ }
+ }
}
else {
if (idle_spawn_rate >= 8) {
@@ -902,10 +1065,13 @@
else if (idle_spawn_rate < MAX_SPAWN_RATE) {
idle_spawn_rate *= 2;
}
+
+ maxed_out = 0;
}
}
else {
idle_spawn_rate = 1;
+ maxed_out = 0;
}
}
--- httpd-2.2.14-p/server/mpm/worker/worker.c 2009-11-02 09:40:23.129750043 +1100
+++ httpd-2.2.14/server/mpm/worker/worker.c 2009-11-02 12:37:53.987529627 +1100
@@ -33,6 +33,7 @@
#define APR_WANT_STRFUNC
#include "apr_want.h"
#include "apr_atomic.h"
+#include "apr_md5.h"
#if APR_HAVE_UNISTD_H
#include <unistd.h>
@@ -422,6 +423,101 @@
clean_child_exit(0);
}
+#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL)
+static void shutdown_sockets(int sig, siginfo_t *info, void *context)
+{
+ int csd, i, j, slot = 0, status, total_rdrs = 0, rdrs = 0,
+ cull = ap_daemons_limit * ap_threads_per_child / 10;
+
+ /* not from parent, ignore */
+ if (info->si_pid != getppid()) {
+ return;
+ }
+
+ suspend_workers = 1;
+ apr_atomic_set32(&suspended_workers, 0);
+
+ /* suspend worker threads */
+ for (i = 0; i < ap_threads_per_child; i++) {
+ if (worker_os_threads[i]) {
+ pthread_kill(*worker_os_threads[i], WORKER_SIGNAL);
+ }
+ }
+
+ /* wait for threads to suspend, but press ahead after a while anyway */
+ for (i = 0;
+ apr_atomic_read32(&suspended_workers) < ap_threads_per_child && i < 25;
+ i++) {
+ apr_sleep(apr_time_from_sec(1) / 5);
+ }
+
+ /* Determine total number of readers (includes keep alive), our
+ * slot and the number of our own readers.
+ */
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ if (ap_scoreboard_image->parent[i].pid == ap_my_pid) {
+ slot = i;
+ }
+
+ for (j = 0; j < ap_threads_per_child; j++) {
+ status = ap_scoreboard_image->servers[i][j].status;
+
+ if (status == SERVER_BUSY_READ ||
+ status == SERVER_BUSY_KEEPALIVE) {
+
+ total_rdrs++;
+
+ if (slot == i) {
+ rdrs++;
+ }
+ }
+ }
+ }
+
+ /* Disconnect all readers (includes keep alive).
+ */
+ for (j = 0; j < ap_threads_per_child; j++) {
+ status = ap_scoreboard_image->servers[slot][j].status;
+
+ if (worker_sockets[j] &&
+ (status == SERVER_BUSY_READ ||
+ status == SERVER_BUSY_KEEPALIVE)) {
+
+ apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]);
+ shutdown(csd, SHUT_RDWR);
+ }
+ }
+
+ /* Make up to 10% of all sockets, if required.
+ */
+ if (total_rdrs < cull) {
+ cull = ((ap_threads_per_child - rdrs) * (cull - total_rdrs)) / cull;
+
+ for (j = 0; j < ap_threads_per_child && cull > 0; j++) {
+ status = ap_scoreboard_image->servers[slot][j].status;
+
+ if (worker_sockets[j] &&
+ status != SERVER_BUSY_READ &&
+ status != SERVER_BUSY_KEEPALIVE) {
+
+ apr_os_sock_get((apr_os_sock_t *)&csd, worker_sockets[j]);
+ shutdown(csd, SHUT_RDWR);
+ cull--;
+ }
+ }
+ }
+
+ suspend_workers = 0;
+
+ /* resume worker threads */
+ for (i = 0; i < ap_threads_per_child; i++) {
+ if (worker_os_threads[i]) {
+ pthread_kill(*worker_os_threads[i], WORKER_SIGNAL);
+ }
+ }
+}
+#endif
+
/*****************************************************************
* Connection structures and accounting...
*/
@@ -1319,12 +1415,28 @@
join_workers(ts->listener, threads);
}
else { /* !one_process */
+#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL)
+ struct sigaction act;
+#endif
+
/* remove SIGTERM from the set of blocked signals... if one of
* the other threads in the process needs to take us down
* (e.g., for MaxRequestsPerChild) it will send us SIGTERM
*/
unblock_signal(SIGTERM);
apr_signal(SIGTERM, dummy_signal_handler);
+
+ /* If the parent sends SIGINT to the child, we shutdown the
+ * client socket, as we suspect that we are under a DoS attack.
+ */
+#if !defined(NO_USE_SIGACTION) && defined(HAVE_PTHREAD_KILL)
+ unblock_signal(SIGINT);
+ memset(&act, 0, sizeof(act));
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = shutdown_sockets;
+ sigaction(SIGINT, &act, NULL);
+#endif
+
/* Watch for any messages from the parent over the POD */
while (1) {
rv = ap_mpm_pod_check(pod);
@@ -1476,6 +1588,8 @@
int last_non_dead;
int total_non_dead;
int active_thread_count = 0;
+ int status = SERVER_DEAD;
+ static apr_time_t maxed_out = 0;
/* initialize the free_list */
free_length = 0;
@@ -1487,7 +1601,6 @@
for (i = 0; i < ap_daemons_limit; ++i) {
/* Initialization to satisfy the compiler. It doesn't know
* that ap_threads_per_child is always > 0 */
- int status = SERVER_DEAD;
int any_dying_threads = 0;
int any_dead_threads = 0;
int all_dead_threads = 1;
@@ -1581,12 +1694,17 @@
/* Kill off one child */
ap_mpm_pod_signal(pod, TRUE);
idle_spawn_rate = 1;
+ maxed_out = 0;
}
else if (idle_thread_count < min_spare_threads) {
/* terminate the free list */
if (free_length == 0) {
/* only report this condition once */
static int reported = 0;
+ static unsigned char sb_digest[APR_MD5_DIGESTSIZE];
+ apr_time_t now = apr_time_now();
+ apr_md5_ctx_t ctx;
+ pid_t pid;
if (!reported) {
ap_log_error(APLOG_MARK, APLOG_ERR, 0,
@@ -1596,6 +1714,95 @@
reported = 1;
}
idle_spawn_rate = 1;
+
+ /* If after one maintenace interval we still see the same
+ * situation on the scoreboard, shutdown all client sockets
+ * in read state and at least 10% of all client sockets.
+ * Crude, but seems to clear things out.
+ */
+ if (maxed_out) {
+ apr_time_t diff = now - maxed_out;
+
+ if (diff >= SCOREBOARD_MAINTENANCE_INTERVAL) {
+ unsigned char cur_digest[APR_MD5_DIGESTSIZE];
+
+ /* Current digest of the scoreboard.
+ */
+ apr_md5_init(&ctx);
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ for (j = 0; j < ap_threads_per_child; j++) {
+ status = ap_scoreboard_image->servers[i][j].status;
+ apr_md5_update(&ctx, &status, sizeof(status));
+ }
+
+ pid = ap_scoreboard_image->parent[i].pid;
+ apr_md5_update(&ctx, &pid, sizeof(pid));
+ }
+ apr_md5_final(cur_digest, &ctx);
+
+ /* If we haven't had a change for one maintenance
+ * interval, we need to make room.
+ */
+ if (memcmp(sb_digest, cur_digest, APR_MD5_DIGESTSIZE)) {
+ maxed_out = 0;
+ }
+ else {
+ /* Signal child processes to shutdown client sockets.
+ */
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ pid = ap_scoreboard_image->parent[i].pid;
+ ap_mpm_safe_kill(pid, SIGINT);
+ }
+ }
+ }
+ }
+ else {
+ int rdrs = 0;
+
+ /* Create digest of the scoreboard, see if things
+ * change next time around.
+ */
+ apr_md5_init(&ctx);
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ for (j = 0; j < ap_threads_per_child; j++) {
+ status = ap_scoreboard_image->servers[i][j].status;
+
+ /* These are conditions we are concerned with.
+ */
+ switch (status) {
+ case SERVER_BUSY_READ:
+ case SERVER_BUSY_KEEPALIVE:
+ rdrs++;
+ case SERVER_BUSY_WRITE:
+ case SERVER_DEAD:
+ case SERVER_GRACEFUL:
+ break;
+ default:
+ return;
+ }
+
+ apr_md5_update(&ctx, &status, sizeof(status));
+ }
+
+ pid = ap_scoreboard_image->parent[i].pid;
+ apr_md5_update(&ctx, &pid, sizeof(pid));
+ }
+ apr_md5_final(sb_digest, &ctx);
+
+ /* Over 95% in read state (includes keep alive), clear now.
+ */
+ if (ap_daemons_limit - rdrs < ap_daemons_limit / 20) {
+ /* Signal child processes to shutdown client sockets.
+ */
+ for (i = 0; i < ap_daemons_limit; ++i) {
+ pid = ap_scoreboard_image->parent[i].pid;
+ ap_mpm_safe_kill(pid, SIGINT);
+ }
+ }
+ else {
+ maxed_out = now;
+ }
+ }
}
else {
if (free_length > idle_spawn_rate) {
@@ -1623,10 +1830,13 @@
else if (idle_spawn_rate < MAX_SPAWN_RATE) {
idle_spawn_rate *= 2;
}
+
+ maxed_out = 0;
}
}
else {
idle_spawn_rate = 1;
+ maxed_out = 0;
}
}