In commit 9dc3abdd1f7e ("tipc: fix nametbl_lock soft lockup at module
exit"), tipc_conn_release() was moved from tipc_conn_kref_release()
to tipc_close_conn() in order to solve nametbl_lock soft lockup at
module exit.
But it accidentally introduced a serious side effect that the
lifecycle of connection instance and its tipc_subscriber is different.
Especially, tipc_close_conn() is releasing the corresponding struct
tipc_subscriber instance without considering that there may still be
work items in the receive work queue. When those are scheduled in the
function tipc_receive_from_work(), they are using the subscriber
pointer stored in struct tipc_conn without first checking if this is
valid or not. This will sometimes lead to crashes as the next call of
tipc_conn_recvmsg() will access the now deleted wokr item.
If we move tipc_conn_release() back to tipc_conn_kref_release() from
tipc_close_conn(), the crash will be resolved as the lifecycle of
connection instance and its user data is always same. But the soft
lockup solved by commit 9dc3abdd1f will occur again. However, if we
flush the pending rx/tx work items in tipc_topsrv_stop() context
before connection is closed by tipc_close_conn(), it means connection
instance will be finally destroyed in the tipc_exit_net() context
rather than other contexts. As a result, the soft lockup will never
happen again.
Fixs: 9dc3abdd1f7e ("tipc: fix nametbl_lock soft lockup at module exit")
Signed-off-by: Ying Xue <[email protected]>
---
net/tipc/server.c | 40 +++++++++++++++++++++-------------------
1 file changed, 21 insertions(+), 19 deletions(-)
diff --git a/net/tipc/server.c b/net/tipc/server.c
index 571e137..4baf1f7 100644
--- a/net/tipc/server.c
+++ b/net/tipc/server.c
@@ -105,6 +105,8 @@ static void tipc_conn_kref_release(struct kref *kref)
}
saddr->scope = -TIPC_NODE_SCOPE;
kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr));
+ if (con->conid)
+ s->tipc_conn_release(con->conid, con->usr_data);
sock_release(sock);
con->sock = NULL;
}
@@ -192,24 +194,24 @@ static void tipc_unregister_callbacks(struct tipc_conn
*con)
write_unlock_bh(&sk->sk_callback_lock);
}
-static void tipc_close_conn(struct tipc_conn *con)
+static void tipc_close_conn(struct tipc_conn *con, bool flush)
{
- struct tipc_server *s = con->server;
-
if (test_and_clear_bit(CF_CONNECTED, &con->flags)) {
- if (con->sock)
+ if (con->sock) {
tipc_unregister_callbacks(con);
- if (con->conid)
- s->tipc_conn_release(con->conid, con->usr_data);
-
- /* We shouldn't flush pending works as we may be in the
- * thread. In fact the races with pending rx/tx work structs
- * are harmless for us here as we have already deleted this
- * connection from server connection list.
- */
- if (con->sock)
+ /* We shouldn't flush pending works as we may
+ * be in the worker thread. But if flush is set,
+ * it means that we are not on worker thread
+ * context and it's safe for us to flush the
+ * pending rx/tx works.
+ */
+ if (flush) {
+ flush_work(&con->swork);
+ flush_work(&con->rwork);
+ }
kernel_sock_shutdown(con->sock, SHUT_RDWR);
+ }
conn_put(con);
}
}
@@ -280,7 +282,7 @@ static int tipc_receive_from_sock(struct tipc_conn *con)
out_close:
if (ret != -EWOULDBLOCK)
- tipc_close_conn(con);
+ tipc_close_conn(con, false);
else if (ret == 0)
/* Don't return success if we really got EOF */
ret = -EAGAIN;
@@ -488,7 +490,7 @@ void tipc_conn_terminate(struct tipc_server *s, int conid)
con = tipc_conn_lookup(s, conid);
if (con) {
- tipc_close_conn(con);
+ tipc_close_conn(con, false);
conn_put(con);
}
}
@@ -516,7 +518,7 @@ bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32
type,
s = con->server;
scbr = s->tipc_conn_new(*conid);
if (!scbr) {
- tipc_close_conn(con);
+ tipc_close_conn(con, false);
return false;
}
@@ -533,7 +535,7 @@ void tipc_topsrv_kern_unsubscr(struct net *net, int conid)
con = tipc_conn_lookup(tipc_topsrv(net), conid);
if (!con)
return;
- tipc_close_conn(con);
+ tipc_close_conn(con, false);
conn_put(con);
}
@@ -606,7 +608,7 @@ static void tipc_send_to_sock(struct tipc_conn *con)
return;
send_err:
- tipc_close_conn(con);
+ tipc_close_conn(con, false);
}
static void tipc_recv_work(struct work_struct *work)
@@ -698,7 +700,7 @@ void tipc_server_stop(struct tipc_server *s)
con = idr_find(&s->conn_idr, id);
if (con) {
spin_unlock_bh(&s->idr_lock);
- tipc_close_conn(con);
+ tipc_close_conn(con, true);
spin_lock_bh(&s->idr_lock);
}
}
--
2.7.4
------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
tipc-discussion mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/tipc-discussion