The pool herder didn't signal the thread when decimating the flock,
causing the thread to be leaked.

Spotted by: xcir
Fixes: #1490

Also close a couple of other races:

When pthread_cond_timedwait returns ETIMEDOUT, the predicate
(wrk->task.func == NULL) could still have changed while reacquiring
the mutex. If so, the signal would've been lost and the thread
leaked. Changed the idle wait loop in Pool_Work_Thread() to always
check the predicate before resuming the cond_wait.

The update of the predicate (wrk->task.func) from e.g. Pool_Task() is
done without holding the mutex. This opens a race on the predicate,
and the possibility of the worker going back on cond_wait loosing the
signal. Changed to update the predicate while holding the mutex.
---
 bin/varnishd/cache/cache_pool.c  | 31 +++++++++++++++++++++----------
 bin/varnishtest/tests/r01490.vtc | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+), 10 deletions(-)
 create mode 100644 bin/varnishtest/tests/r01490.vtc

diff --git a/bin/varnishd/cache/cache_pool.c b/bin/varnishd/cache/cache_pool.c
index 75fac47..fbfb993 100644
--- a/bin/varnishd/cache/cache_pool.c
+++ b/bin/varnishd/cache/cache_pool.c
@@ -163,12 +163,12 @@ pool_accept(struct worker *wrk, void *arg)
                }
                VTAILQ_REMOVE(&pp->idle_queue, &wrk2->task, list);
                AZ(wrk2->task.func);
-               Lck_Unlock(&pp->mtx);
                assert(sizeof *wa2 == WS_Reserve(wrk2->aws, sizeof *wa2));
                wa2 = (void*)wrk2->aws->f;
                memcpy(wa2, wa, sizeof *wa);
                wrk2->task.func = SES_pool_accept_task;
                wrk2->task.priv = pp->sesspool;
+               Lck_Unlock(&pp->mtx);
                AZ(pthread_cond_signal(&wrk2->cond));
 
                /*
@@ -204,9 +204,9 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum 
pool_how how)
        if (wrk != NULL) {
                VTAILQ_REMOVE(&pp->idle_queue, &wrk->task, list);
                AZ(wrk->task.func);
-               Lck_Unlock(&pp->mtx);
                wrk->task.func = task->func;
                wrk->task.priv = task->priv;
+               Lck_Unlock(&pp->mtx);
                AZ(pthread_cond_signal(&wrk->cond));
                return (0);
        }
@@ -237,6 +237,17 @@ Pool_Task(struct pool *pp, struct pool_task *task, enum 
pool_how how)
 }
 
 /*--------------------------------------------------------------------
+ * Empty function used as a pointer value for the thread exit condition.
+ */
+
+static void
+pool_kiss_of_death(struct worker *wrk, void *priv)
+{
+       (void)wrk;
+       (void)priv;
+}
+
+/*--------------------------------------------------------------------
  * This is the work function for worker threads in the pool.
  */
 
@@ -274,7 +285,6 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
                                wrk->lastused = VTIM_real();
                        wrk->task.func = NULL;
                        wrk->task.priv = wrk;
-                       AZ(wrk->task.func);
                        VTAILQ_INSERT_HEAD(&pp->idle_queue, &wrk->task, list);
                        if (!stats_clean)
                                WRK_SumStat(wrk);
@@ -283,12 +293,12 @@ Pool_Work_Thread(void *priv, struct worker *wrk)
                                    wrk->vcl == NULL ?  0 : wrk->lastused+60.);
                                if (i == ETIMEDOUT)
                                        VCL_Rel(&wrk->vcl);
-                       } while (i);
+                       } while (wrk->task.func == NULL);
                        tp = &wrk->task;
                }
                Lck_Unlock(&pp->mtx);
 
-               if (tp->func == NULL)
+               if (tp->func == pool_kiss_of_death)
                        break;
 
                assert(wrk->pool == pp);
@@ -387,23 +397,24 @@ pool_herder(void *priv)
                                CAST_OBJ_NOTNULL(wrk, pt->priv, WORKER_MAGIC);
 
                                if (wrk->lastused < t_idle ||
-                                   pp->nthr > cache_param->wthread_max)
+                                   pp->nthr > cache_param->wthread_max) {
+                                       /* Give it a kiss on the cheek... */
                                        VTAILQ_REMOVE(&pp->idle_queue,
                                            &wrk->task, list);
-                               else
+                                       wrk->task.func = pool_kiss_of_death;
+                                       AZ(pthread_cond_signal(&wrk->cond));
+                               } else
                                        wrk = NULL;
                        }
                        Lck_Unlock(&pp->mtx);
 
-                       /* And give it a kiss on the cheek... */
                        if (wrk != NULL) {
+                               /* Update counters */
                                pp->nthr--;
                                Lck_Lock(&pool_mtx);
                                VSC_C_main->threads--;
                                VSC_C_main->threads_destroyed++;
                                Lck_Unlock(&pool_mtx);
-                               wrk->task.func = NULL;
-                               wrk->task.priv = NULL;
                                VTIM_sleep(cache_param->wthread_destroy_delay);
                                continue;
                        }
diff --git a/bin/varnishtest/tests/r01490.vtc b/bin/varnishtest/tests/r01490.vtc
new file mode 100644
index 0000000..82ffdb4
--- /dev/null
+++ b/bin/varnishtest/tests/r01490.vtc
@@ -0,0 +1,38 @@
+varnishtest "#1490 - thread destruction"
+
+server s1 {
+} -start
+
+varnish v1 \
+       -arg "-p debug=+syncvsl" \
+       -arg "-p vsl_mask=+WorkThread" \
+       -arg "-p thread_pool_min=2" \
+       -arg "-p thread_pool_max=3" \
+       -arg "-p thread_pools=1" \
+       -vcl+backend {}
+varnish v1 -start
+
+varnish v1 -expect threads == 2
+
+logexpect l1 -v v1 -g raw {
+       expect * 0 WorkThread {^\S+ start$}
+       expect * 0 WorkThread {^\S+ end$}
+} -start
+
+varnish v1 -cliok "param.set thread_pool_min 3"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 3
+
+varnish v1 -cliok "param.set thread_pool_min 2"
+varnish v1 -cliok "param.set thread_pool_max 2"
+
+# Herder thread sleeps 5 seconds. Have to wait longer than that.
+delay 6
+
+varnish v1 -expect threads == 2
+
+# Use logexpect to see that the thread actually exited
+logexpect l1 -wait
-- 
1.9.2


_______________________________________________
varnish-dev mailing list
[email protected]
https://www.varnish-cache.org/lists/mailman/listinfo/varnish-dev

Reply via email to