On Wed, Mar 21, 2012 at 07:59:13PM -0600, dann frazier wrote: > However... we've dropped the connections_lock, so its possible that a > new connection gets created on line 9. This connection structure would > have pointers to the workqueues that we're about to destroy. Sometime > later on we get data on this new connection, we try to add work to the > now-obliterated workqueues, and things blow up.
Hi Dan, I'm not very familiar with this code either, but I've talked with Chrissie and she suggested we try something like this: diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 133ef6d..a3c431e 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -142,6 +142,7 @@ struct writequeue_entry { static struct sockaddr_storage *dlm_local_addr[DLM_MAX_ADDR_COUNT]; static int dlm_local_count; +static int dlm_allow_conn; /* Work queues */ static struct workqueue_struct *recv_workqueue; @@ -710,6 +711,13 @@ static int tcp_accept_from_sock(struct connection *con) struct connection *newcon; struct connection *addcon; + mutex_lock(&connections_lock); + if (!dlm_allow_conn) { + mutex_unlock(&connections_lock); + return -1; + } + mutex_unlock(&connections_lock); + memset(&peeraddr, 0, sizeof(peeraddr)); result = sock_create_kern(dlm_local_addr[0]->ss_family, SOCK_STREAM, IPPROTO_TCP, &newsock); @@ -1503,6 +1511,7 @@ void dlm_lowcomms_stop(void) socket activity. */ mutex_lock(&connections_lock); + dlm_allow_conn = 0; foreach_conn(stop_conn); mutex_unlock(&connections_lock); @@ -1540,6 +1549,8 @@ int dlm_lowcomms_start(void) if (!con_cache) goto out; + dlm_allow_conn = 1; + /* Start listening */ if (dlm_config.ci_protocol == 0) error = tcp_listen_for_all(); @@ -1555,6 +1566,7 @@ int dlm_lowcomms_start(void) return 0; fail_unlisten: + dlm_allow_conn = 0; con = nodeid2con(0,0); if (con) { close_connection(con, false);