From: Omar Sandoval <osan...@fb.com>

Commit 8cf466602028 ("kyber: fix hang on domain token wait queue") fixed
a hang caused by leaving wait entries on the domain token wait queue
after the __sbitmap_queue_get() retry succeeded, making that wait entry
a "dud" which won't in turn wake more entries up. However, we can also
get a dud entry if kyber_get_domain_token() fails once but is then
called again and succeeds. This can happen if the hardware queue is
rerun for some other reason, or, more likely, kyber_dispatch_request()
tries the same domain twice.

The fix is to remove our entry from the wait queue whenever we
successfully get a token. The only complication is that we might be on
one of many wait queues in the struct sbitmap_queue, but that's easily
fixed by remembering which wait queue we were put on.

While we're here, only initialize the wait queue entry once instead on
every wait, and use spin_lock_irq() instead of spin_lock_irqsave(),
since this is always called from process context with irqs enabled.

Signed-off-by: Omar Sandoval <osan...@fb.com>
---
I have another, rarer hang I'm still working out, but I can get this one
out of the way.

 block/kyber-iosched.c | 39 +++++++++++++++++++++++++--------------
 1 file changed, 25 insertions(+), 14 deletions(-)

diff --git a/block/kyber-iosched.c b/block/kyber-iosched.c
index b4df317c2916..00cf624ce3ed 100644
--- a/block/kyber-iosched.c
+++ b/block/kyber-iosched.c
@@ -100,9 +100,13 @@ struct kyber_hctx_data {
        unsigned int cur_domain;
        unsigned int batching;
        wait_queue_entry_t domain_wait[KYBER_NUM_DOMAINS];
+       struct sbq_wait_state *domain_ws[KYBER_NUM_DOMAINS];
        atomic_t wait_index[KYBER_NUM_DOMAINS];
 };
 
+static int kyber_domain_wake(wait_queue_entry_t *wait, unsigned mode, int 
flags,
+                            void *key);
+
 static int rq_sched_domain(const struct request *rq)
 {
        unsigned int op = rq->cmd_flags;
@@ -385,6 +389,9 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, 
unsigned int hctx_idx)
 
        for (i = 0; i < KYBER_NUM_DOMAINS; i++) {
                INIT_LIST_HEAD(&khd->rqs[i]);
+               init_waitqueue_func_entry(&khd->domain_wait[i],
+                                         kyber_domain_wake);
+               khd->domain_wait[i].private = hctx;
                INIT_LIST_HEAD(&khd->domain_wait[i].entry);
                atomic_set(&khd->wait_index[i], 0);
        }
@@ -524,35 +531,39 @@ static int kyber_get_domain_token(struct kyber_queue_data 
*kqd,
        int nr;
 
        nr = __sbitmap_queue_get(domain_tokens);
-       if (nr >= 0)
-               return nr;
 
        /*
         * If we failed to get a domain token, make sure the hardware queue is
         * run when one becomes available. Note that this is serialized on
         * khd->lock, but we still need to be careful about the waker.
         */
-       if (list_empty_careful(&wait->entry)) {
-               init_waitqueue_func_entry(wait, kyber_domain_wake);
-               wait->private = hctx;
+       if (nr < 0 && list_empty_careful(&wait->entry)) {
                ws = sbq_wait_ptr(domain_tokens,
                                  &khd->wait_index[sched_domain]);
-               add_wait_queue(&ws->wait, wait);
+               khd->domain_ws[sched_domain] = ws;
+               add_wait_queue_exclusive(&ws->wait, wait);
 
                /*
                 * Try again in case a token was freed before we got on the wait
-                * queue. The waker may have already removed the entry from the
-                * wait queue, but list_del_init() is okay with that.
+                * queue.
                 */
                nr = __sbitmap_queue_get(domain_tokens);
-               if (nr >= 0) {
-                       unsigned long flags;
+       }
 
-                       spin_lock_irqsave(&ws->wait.lock, flags);
-                       list_del_init(&wait->entry);
-                       spin_unlock_irqrestore(&ws->wait.lock, flags);
-               }
+       /*
+        * If we got a token while we were on the wait queue, remove ourselves
+        * from the wait queue to ensure that all wake ups make forward
+        * progress. It's possible that the waker already deleted the entry
+        * between the !list_empty_careful() check and us grabbing the lock, but
+        * list_del_init() is okay with that.
+        */
+       if (nr >= 0 && !list_empty_careful(&wait->entry)) {
+               ws = khd->domain_ws[sched_domain];
+               spin_lock_irq(&ws->wait.lock);
+               list_del_init(&wait->entry);
+               spin_unlock_irq(&ws->wait.lock);
        }
+
        return nr;
 }
 
-- 
2.15.1

Reply via email to