ovs-vswitchd makes extensive use of RCU to defer freeing memory past the latest time that it could be in use by a thread. Until now, ovs-vswitchd has not waited for RCU callbacks to fire before exiting. This meant that in many cases, when ovs-vswitchd exits, many blocks of memory are stuck in RCU callback queues, which valgrind often reports as "possible" memory leaks.
This commit adds a new function ovsrcu_exit() that waits and fires as many RCU callbacks as it reasonably can. It can only do so for the thread that calls it and the thread that calls the callbacks, but generally speaking ovs-vswitchd shuts down other threads before it exits anyway, so this is pretty good. In my testing this eliminates most valgrind warnings for tests that run ovs-vswitchd. This ought to make it easier to distinguish new leaks that are real from existing non-leaks. Signed-off-by: Ben Pfaff <[email protected]> --- lib/ovs-rcu.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++-- lib/ovs-rcu.h | 2 ++ vswitchd/ovs-vswitchd.c | 2 ++ 3 files changed, 57 insertions(+), 2 deletions(-) diff --git a/lib/ovs-rcu.c b/lib/ovs-rcu.c index 05a46d4524e3..ebc8120f0fd3 100644 --- a/lib/ovs-rcu.c +++ b/lib/ovs-rcu.c @@ -19,6 +19,7 @@ #include "ovs-rcu.h" #include "fatal-signal.h" #include "guarded-list.h" +#include "latch.h" #include "openvswitch/list.h" #include "ovs-thread.h" #include "openvswitch/poll-loop.h" @@ -58,6 +59,9 @@ static struct ovs_mutex ovsrcu_threads_mutex; static struct guarded_list flushed_cbsets; static struct seq *flushed_cbsets_seq; +static struct latch postpone_exit; +static struct ovs_barrier postpone_barrier; + static void ovsrcu_init_module(void); static void ovsrcu_flush_cbset__(struct ovsrcu_perthread *, bool); static void ovsrcu_flush_cbset(struct ovsrcu_perthread *); @@ -111,6 +115,8 @@ ovsrcu_quiesced(void) } else { static struct ovsthread_once once = OVSTHREAD_ONCE_INITIALIZER; if (ovsthread_once_start(&once)) { + latch_init(&postpone_exit); + ovs_barrier_init(&postpone_barrier, 2); ovs_thread_create("urcu", ovsrcu_postpone_thread, NULL); ovsthread_once_done(&once); } @@ -232,6 +238,49 @@ ovsrcu_synchronize(void) ovsrcu_quiesce_end(); } +/* Waits until as many postponed callbacks as possible have executed. + * + * As a side effect, stops the background thread that calls the callbacks and + * prevents it from being restarted. This means that this function should only + * be called soon before a process exits, as a mechanism for releasing memory + * to make memory leaks easier to detect, since any further postponed callbacks + * won't actually get called. + * + * This function can only wait for callbacks registered by the current thread + * and the background thread that calls the callbacks. Thus, it will be most + * effective if other threads have already exited. */ +void +ovsrcu_exit(void) +{ + /* Stop the postpone thread and wait for it to exit. Otherwise, there's no + * way to wait for that thread to finish calling callbacks itself. */ + if (!single_threaded()) { + ovsrcu_quiesced(); /* Ensure that the postpone thread exists. */ + latch_set(&postpone_exit); + ovs_barrier_block(&postpone_barrier); + } + + /* Repeatedly: + * + * - Wait for a grace period. One important side effect is to push the + * running thread's cbset into 'flushed_cbsets' so that the next call + * has something to call. + * + * - Call all the callbacks in 'flushed_cbsets'. If there aren't any, + * we're done, otherwise the callbacks themselves might have requested + * more deferred callbacks so we go around again. + * + * We limit the number of iterations just in case some bug causes an + * infinite loop. This function is just for making memory leaks easier to + * spot so there's no point in breaking things on that basis. */ + for (int i = 0; i < 8; i++) { + ovsrcu_synchronize(); + if (!ovsrcu_call_postponed()) { + break; + } + } +} + /* Registers 'function' to be called, passing 'aux' as argument, after the * next grace period. * @@ -303,15 +352,17 @@ ovsrcu_postpone_thread(void *arg OVS_UNUSED) { pthread_detach(pthread_self()); - for (;;) { + while (!latch_is_set(&postpone_exit)) { uint64_t seqno = seq_read(flushed_cbsets_seq); if (!ovsrcu_call_postponed()) { seq_wait(flushed_cbsets_seq, seqno); + latch_wait(&postpone_exit); poll_block(); } } - OVS_NOT_REACHED(); + ovs_barrier_block(&postpone_barrier); + return NULL; } static void diff --git a/lib/ovs-rcu.h b/lib/ovs-rcu.h index 2887bb8f7ffc..ecc4c920102c 100644 --- a/lib/ovs-rcu.h +++ b/lib/ovs-rcu.h @@ -308,4 +308,6 @@ bool ovsrcu_is_quiescent(void); * once. This can block for a relatively long time. */ void ovsrcu_synchronize(void); +void ovsrcu_exit(void); + #endif /* ovs-rcu.h */ diff --git a/vswitchd/ovs-vswitchd.c b/vswitchd/ovs-vswitchd.c index d5e07c0376cd..53e511999594 100644 --- a/vswitchd/ovs-vswitchd.c +++ b/vswitchd/ovs-vswitchd.c @@ -37,6 +37,7 @@ #include "netdev.h" #include "openflow/openflow.h" #include "ovsdb-idl.h" +#include "ovs-rcu.h" #include "openvswitch/poll-loop.h" #include "simap.h" #include "stream-ssl.h" @@ -135,6 +136,7 @@ main(int argc, char *argv[]) bridge_exit(cleanup); unixctl_server_destroy(unixctl); service_stop(); + ovsrcu_exit(); return 0; } -- 2.10.2 _______________________________________________ dev mailing list [email protected] https://mail.openvswitch.org/mailman/listinfo/ovs-dev
