The next patch will delegate fanout operations to a background worker,
which requires cancel_work_sync() during connection cleanup.  However,
the error path of __rds_conn_create() currently calls
trans->conn_free() while holding rds_conn_lock (spinlock) and
rcu_read_lock, which creates an atomic context where cancel_work_sync()
cannot sleep.

To avoid this, refactor the error/race paths to defer
trans->conn_free() calls until after locks are released. This allows
transport cleanup functions to perform blocking operations safely.

This patch moves the cp_transport_data cleanup to the 'out:' label
where it runs outside the critical section, after the connection has
been freed from the slab and cannot be accessed by racing threads.

Signed-off-by: Allison Henderson <[email protected]>
---
 net/rds/connection.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/net/rds/connection.c b/net/rds/connection.c
index 185f73b01694..695ab7446178 100644
--- a/net/rds/connection.c
+++ b/net/rds/connection.c
@@ -170,6 +170,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
        struct hlist_head *head = rds_conn_bucket(laddr, faddr);
        struct rds_transport *loop_trans;
        struct rds_conn_path *free_cp = NULL;
+       struct rds_transport *free_trans = NULL;
        unsigned long flags;
        int ret, i;
        int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1);
@@ -305,7 +306,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
        if (parent) {
                /* Creating passive conn */
                if (parent->c_passive) {
-                       trans->conn_free(conn->c_path[0].cp_transport_data);
+                       free_trans = trans;
                        free_cp = conn->c_path;
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = parent->c_passive;
@@ -321,18 +322,7 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
                found = rds_conn_lookup(net, head, laddr, faddr, trans,
                                        tos, dev_if);
                if (found) {
-                       struct rds_conn_path *cp;
-                       int i;
-
-                       for (i = 0; i < npaths; i++) {
-                               cp = &conn->c_path[i];
-                               /* The ->conn_alloc invocation may have
-                                * allocated resource for all paths, so all
-                                * of them may have to be freed here.
-                                */
-                               if (cp->cp_transport_data)
-                                       trans->conn_free(cp->cp_transport_data);
-                       }
+                       free_trans = trans;
                        free_cp = conn->c_path;
                        kmem_cache_free(rds_conn_slab, conn);
                        conn = found;
@@ -349,9 +339,23 @@ static struct rds_connection *__rds_conn_create(struct net 
*net,
 
 out:
        if (free_cp) {
-               for (i = 0; i < npaths; i++)
+               for (i = 0; i < npaths; i++) {
+                       /*
+                        * The trans->conn_alloc call may have allocated
+                        * resources for the cp paths, which will need to
+                        * be freed before freeing cp itself.  We do this here
+                        * so it runs outside the rds_conn_lock spinlock
+                        * and rcu_read_lock section, because conn_free()
+                        * may call cancel_work_sync() which
+                        * can sleep.  free_trans is only set in the
+                        * race-loss paths where conn_alloc() succeeded.
+                        */
+                       if (free_trans && free_cp[i].cp_transport_data)
+                               free_trans->conn_free
+                                       (free_cp[i].cp_transport_data);
                        if (free_cp[i].cp_wq != rds_wq)
                                destroy_workqueue(free_cp[i].cp_wq);
+               }
                kfree(free_cp);
        }
 
-- 
2.43.0


Reply via email to