Gitweb:     
http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commit;h=c1384c9c4c184543375b52a0997d06cd98145164
Commit:     c1384c9c4c184543375b52a0997d06cd98145164
Parent:     6e5b70e9d1e712d8dad5514e0ab5240ac4b5fb57
Author:     Trond Myklebust <[EMAIL PROTECTED]>
AuthorDate: Thu Jun 14 18:00:42 2007 -0400
Committer:  Trond Myklebust <[EMAIL PROTECTED]>
CommitDate: Tue Jul 10 23:40:31 2007 -0400

    SUNRPC: fix hang due to eventd deadlock...
    
    Brian Behlendorf writes:
    
    The root cause of the NFS hang we were observing appears to be a rare
    deadlock between the kernel provided usermodehelper API and the linux NFS
    client.  The deadlock can arise because both of these services use the
    generic linux work queues.  The usermodehelper API run the specified user
    application in the context of the work queue.  And NFS submits both cleanup
    and reconnect work to the generic work queue for handling.  Normally this
    is fine but a deadlock can result in the following situation.
    
      - NFS client is in a disconnected state
      - [events/0] runs a usermodehelper app with an NFS dependent operation,
        this triggers an NFS reconnect.
      - NFS reconnect happens to be submitted to [events/0] work queue.
      - Deadlock, the [events/0] work queue will never process the
        reconnect because it is blocked on the previous NFS dependent
        operation which will not complete.`
    
    The solution is simply to run reconnect requests on rpciod.
    
    Signed-off-by: Trond Myklebust <[EMAIL PROTECTED]>
---
 net/sunrpc/xprt.c     |    4 ++--
 net/sunrpc/xprtsock.c |   17 +++++++----------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5b05b73..518acb7 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -127,7 +127,7 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
                clear_bit(XPRT_LOCKED, &xprt->state);
                smp_mb__after_clear_bit();
        } else
-               schedule_work(&xprt->task_cleanup);
+               queue_work(rpciod_workqueue, &xprt->task_cleanup);
 }
 
 /*
@@ -515,7 +515,7 @@ xprt_init_autodisconnect(unsigned long data)
        if (xprt_connecting(xprt))
                xprt_release_write(xprt, NULL);
        else
-               schedule_work(&xprt->task_cleanup);
+               queue_work(rpciod_workqueue, &xprt->task_cleanup);
        return;
 out_abort:
        spin_unlock(&xprt->transport_lock);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index cc33c58..ee6ad3b 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -653,8 +653,7 @@ static void xs_destroy(struct rpc_xprt *xprt)
 
        dprintk("RPC:       xs_destroy xprt %p\n", xprt);
 
-       cancel_delayed_work(&transport->connect_worker);
-       flush_scheduled_work();
+       cancel_rearming_delayed_work(&transport->connect_worker);
 
        xprt_disconnect(xprt);
        xs_close(xprt);
@@ -1001,7 +1000,7 @@ static void xs_tcp_state_change(struct sock *sk)
                /* Try to schedule an autoclose RPC calls */
                set_bit(XPRT_CLOSE_WAIT, &xprt->state);
                if (test_and_set_bit(XPRT_LOCKED, &xprt->state) == 0)
-                       schedule_work(&xprt->task_cleanup);
+                       queue_work(rpciod_workqueue, &xprt->task_cleanup);
        default:
                xprt_disconnect(xprt);
        }
@@ -1410,18 +1409,16 @@ static void xs_connect(struct rpc_task *task)
                dprintk("RPC:       xs_connect delayed xprt %p for %lu "
                                "seconds\n",
                                xprt, xprt->reestablish_timeout / HZ);
-               schedule_delayed_work(&transport->connect_worker,
-                                       xprt->reestablish_timeout);
+               queue_delayed_work(rpciod_workqueue,
+                                  &transport->connect_worker,
+                                  xprt->reestablish_timeout);
                xprt->reestablish_timeout <<= 1;
                if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
        } else {
                dprintk("RPC:       xs_connect scheduled xprt %p\n", xprt);
-               schedule_delayed_work(&transport->connect_worker, 0);
-
-               /* flush_scheduled_work can sleep... */
-               if (!RPC_IS_ASYNC(task))
-                       flush_scheduled_work();
+               queue_delayed_work(rpciod_workqueue,
+                                  &transport->connect_worker, 0);
        }
 }
 
-
To unsubscribe from this list: send the line "unsubscribe git-commits-head" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to