[Devel] [PATCH VZ9] fs/fuse kio: fix crash due to race condition in krpc abort

Liu Kui Thu, 15 May 2025 00:28:05 -0700

Traversing the list of requests to be aborted needs to be done under
the krpc->lock, otherwise other threads could delete requests from
the list, resulting in illegal memory access.


However in the unlikely event when the aborting msg is under I/O,
krpc abort must wait until it finish. Apparently wait and holding
the krpc->lock is dangerous and unacceptable, however wait on the
msg without krpc->lock is also dangerous as the msg could have
been freed by the time we wake up. So in that case we simply wait
retry again. However if the msg is stuck at netio for a long period
of time we need to kill the connection. The timeout can be set via
module parameter 'pcs_krpc_abort_timeout'.

Fixes: VSTOR-106209
https://virtuozzo.atlassian.net/browse/VSTOR-106209

Signed-off-by: Liu Kui <kui....@virtuozzo.com>

fix debug
---
 fs/fuse/kio/pcs/pcs_krpc.c | 44 ++++++++++++++++++++++++++++++++------
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/fs/fuse/kio/pcs/pcs_krpc.c b/fs/fuse/kio/pcs/pcs_krpc.c
index 3e404a2d0bde..9cc4e12b5eab 100644
--- a/fs/fuse/kio/pcs/pcs_krpc.c
+++ b/fs/fuse/kio/pcs/pcs_krpc.c
@@ -29,6 +29,10 @@ unsigned int pcs_krpc_use_thread = 1;
 module_param(pcs_krpc_use_thread, uint, 0644);
 MODULE_PARM_DESC(pcs_krpc_use_thread, "Offload creating the request to a 
thread");
 
+unsigned int pcs_krpc_abort_timeout = 10;
+module_param(pcs_krpc_abort_timeout, uint, 0644);
+MODULE_PARM_DESC(pcs_krpc_abort_timeout, "Timeout for krpc abort in 
milisecond");
+
 extern unsigned int pcs_krpc_version;
 
 struct kmem_cache *krpc_req_cachep;
@@ -726,9 +730,10 @@ static int pcs_krpc_ioctl_send_msg(struct krpc_req *kreq)
 
 static int pcs_krpc_abort(struct pcs_krpc *krpc)
 {
-       struct krpc_req *kreq, *tmp;
+       struct krpc_req *kreq;
        struct krpc_completion *comp;
        struct pcs_msg *msg;
+       int retries = pcs_krpc_abort_timeout * 10;
 
        spin_lock(&krpc->lock);
 
@@ -755,20 +760,47 @@ static int pcs_krpc_abort(struct pcs_krpc *krpc)
        }
        krpc->nr_completion = 0;
 
-       /* abort incompleted requests */
+       /*
+        * Move incompleted requests to a separate queue and abort every one
+        * of them
+        */
        list_splice_tail_init(&krpc->pending_queue, &krpc->dispose_queue);
-       spin_unlock(&krpc->lock);
 
-       list_for_each_entry_safe(kreq, tmp, &krpc->dispose_queue, link) {
+retry:
+       list_for_each_entry(kreq, &krpc->dispose_queue, link) {
+               if (kreq->flags & KRPC_REQ_F_ABORTED)
+                       continue;
+
                kreq->flags |= KRPC_REQ_F_ABORTED;
                msg = &kreq->msg;
                /*
                 * The msg isn't freed immediately here however the user buffer
-                * won't be accessed.
+                * won't be accessed. If in an unlikely event the msg is 
currently
+                * under I/O, we must wait until I/O completes.
                 */
-               pcs_msg_abort(msg, true);
+               if (pcs_msg_abort(msg, false) == -EBUSY) {
+                       spin_unlock(&krpc->lock);
+
+                       if (--retries < 0) {
+                               /* seems the msg is stuck at netio, kill the 
connection */
+                               mutex_lock(&krpc->rpc->mutex);
+                               rpc_abort(krpc->rpc, 0, PCS_ERR_NET_ABORT);
+                               mutex_unlock(&krpc->rpc->mutex);
+                               return 0;
+                       }
+                       usleep_range(100, 200);
+
+                       /*
+                        * The msg we're waiting for may have been completed 
and freed,
+                        * so we must go through the list again under 
krpc->lock.
+                        */
+                       spin_lock(&krpc->lock);
+                       goto retry;
+               }
        }
 
+       spin_unlock(&krpc->lock);
+
        return 0;
 }
 
-- 
2.39.5 (Apple Git-154)

_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

[Devel] [PATCH VZ9] fs/fuse kio: fix crash due to race condition in krpc abort

Reply via email to