On Sat, Apr 6, 2024 at 10:46 AM jean-frederic clere <jfcl...@gmail.com> wrote:
>
> On 4/5/24 07:55, Ruediger Pluem wrote:
> >
> > Are you able to provide a stacktrace of the hanging process (thread apply 
> > all bt full)?
>
> It seems pthread_kill(t, 0) returns 0 even the thread t has exited...
> older version of fedora will return 3 (I have tried fc28)

If pthread_kill() does not work we probably should use the global
"dying" variable like in mpm_event.
But it's not clear from your earlier "bt full" whether there are other
threads, could you try "thread apply all bt full" instead to show all
the threads?
It's clear from the main thread's backtrace that it's waiting for the
listener in the "iter" loop, but nothing tells if the listener already
exited or not. The listener for instance could be waiting indefinitely
apr_pollset_poll() at this point, and since there is no pollset wakeup
in mpm_worker I don't think that wakeup_listener() can help here.
So maybe we need to add an apr_pollset_wakeup() in wakeup_listener()
too, like in mpm_event too.

Overall something like the attached patch?


Regards;
Yann.
Index: server/mpm/worker/worker.c
===================================================================
--- server/mpm/worker/worker.c	(revision 1916768)
+++ server/mpm/worker/worker.c	(working copy)
@@ -125,10 +125,11 @@ static int max_workers = 0;
 static int server_limit = 0;
 static int thread_limit = 0;
 static int had_healthy_child = 0;
-static int dying = 0;
+static volatile int dying = 0;
 static int workers_may_exit = 0;
 static int start_thread_may_exit = 0;
 static int listener_may_exit = 0;
+static int listener_is_wakeable = 0; /* Pollset supports APR_POLLSET_WAKEABLE */
 static int requests_this_child;
 static int num_listensocks = 0;
 static int resource_shortage = 0;
@@ -272,6 +273,15 @@ static void close_worker_sockets(void)
 static void wakeup_listener(void)
 {
     listener_may_exit = 1;
+
+    /* Unblock the listener if it's poll()ing */
+    if (worker_pollset && listener_is_wakeable) {
+        apr_pollset_wakeup(worker_pollset);
+    }
+
+    /* unblock the listener if it's waiting for a worker */
+    ap_queue_info_term(worker_queue_info);
+
     if (!listener_os_thread) {
         /* XXX there is an obscure path that this doesn't handle perfectly:
          *     right after listener thread is created but before
@@ -280,10 +290,6 @@ static void wakeup_listener(void)
          */
         return;
     }
-
-    /* unblock the listener if it's waiting for a worker */
-    ap_queue_info_term(worker_queue_info);
-
     /*
      * we should just be able to "kill(ap_my_pid, LISTENER_SIGNAL)" on all
      * platforms and wake up the listener thread since it is the only thread
@@ -716,6 +722,7 @@ static void * APR_THREAD_FUNC listener_thread(apr_
     ap_close_listeners_ex(my_bucket->listeners);
     ap_queue_info_free_idle_pools(worker_queue_info);
     ap_queue_term(worker_queue);
+
     dying = 1;
     ap_scoreboard_image->parent[process_slot].quiescing = 1;
 
@@ -861,6 +868,10 @@ static void create_listener_thread(thread_starter
 static void setup_threads_runtime(void)
 {
     ap_listen_rec *lr;
+    int pollset_flags, i;
+    const int good_methods[] = { APR_POLLSET_KQUEUE,
+                                 APR_POLLSET_PORT,
+                                 APR_POLLSET_EPOLL };
     apr_status_t rv;
 
     /* All threads (listener, workers) and synchronization objects (queues,
@@ -894,9 +905,31 @@ static void setup_threads_runtime(void)
     }
 
     /* Create the main pollset */
-    rv = apr_pollset_create(&worker_pollset, num_listensocks, pruntime,
-                            APR_POLLSET_NOCOPY);
+    pollset_flags = APR_POLLSET_NOCOPY | APR_POLLSET_NODEFAULT | APR_POLLSET_WAKEABLE;
+    for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
+        rv = apr_pollset_create_ex(&worker_pollset, num_listensocks, pruntime,
+                                   pollset_flags, good_methods[i]);
+        if (rv == APR_SUCCESS) {
+            listener_is_wakeable = 1;
+            break;
+        }
+    }
     if (rv != APR_SUCCESS) {
+        pollset_flags &= ~APR_POLLSET_WAKEABLE;
+        for (i = 0; i < sizeof(good_methods) / sizeof(good_methods[0]); i++) {
+            rv = apr_pollset_create_ex(&worker_pollset, num_listensocks, pruntime,
+                                       pollset_flags, good_methods[i]);
+            if (rv == APR_SUCCESS) {
+                break;
+            }
+        }
+    }
+    if (rv != APR_SUCCESS) {
+        pollset_flags &= ~APR_POLLSET_NODEFAULT;
+        rv = apr_pollset_create(&worker_pollset, num_listensocks, pruntime,
+                                pollset_flags);
+    }
+    if (rv != APR_SUCCESS) {
         ap_log_error(APLOG_MARK, APLOG_EMERG, rv, ap_server_conf, APLOGNO(03285)
                      "Couldn't create pollset in thread;"
                      " check system or user limits");
@@ -1031,19 +1064,17 @@ static void join_workers(apr_thread_t *listener, a
          */
 
         iter = 0;
-        while (iter < 10 &&
-#ifdef HAVE_PTHREAD_KILL
-               pthread_kill(*listener_os_thread, 0)
-#else
-               kill(ap_my_pid, 0)
-#endif
-               == 0) {
-            /* listener not dead yet */
-            apr_sleep(apr_time_make(0, 500000));
+        while (!dying) {
+            apr_sleep(apr_time_from_msec(500));
+            if (dying || ++iter > 10) {
+                break;
+            }
+            /* listener has not stopped accepting yet */
+            ap_log_error(APLOG_MARK, APLOG_TRACE1, 0, ap_server_conf,
+                         "listener has not stopped accepting yet (%d iter)", iter);
             wakeup_listener();
-            ++iter;
         }
-        if (iter >= 10) {
+        if (iter > 10) {
             ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00276)
                          "the listener thread didn't exit");
         }

Reply via email to