When a timer is fired a pending I/O request is restarted and
tg->any_timer_armed is reset so other requests can be scheduled.

However we're resetting any_timer_armed first in timer_cb() before
the request is actually restarted, and there's a window between both
moments in which another thread can arm the same timer, hitting an
assertion in throttle_group_restart_queue().

This can be solved by deferring the reset of tg->any_timer_armed to
the moment when the queue is actually restarted, which is protected by
tg->lock, preventing other threads from arming the timer before that.

In addition to that, throttle_group_restart_tgm() is also updated to
hold tg->lock while the timer is being inspected. Here we consider
three different scenarios:

- If the tgm has a timer set, fire it immediately
- If another tgm has a timer set, restart the queue anyway
- If there is no timer set in this group then simulate a timer that
  fires immediately, by setting tg->any_timer_armed in order to
  prevent other threads from arming a timer in the meantime.

Resolves: https://gitlab.com/qemu-project/qemu/-/issues/3194
Signed-off-by: Alberto Garcia <[email protected]>
---
 block/throttle-groups.c | 70 +++++++++++++++++++++++++++++++----------
 1 file changed, 54 insertions(+), 16 deletions(-)

diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 5329ff1fdb..cd1bdd0fd9 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -391,6 +391,7 @@ void coroutine_fn 
throttle_group_co_io_limits_intercept(ThrottleGroupMember *tgm
 typedef struct {
     ThrottleGroupMember *tgm;
     ThrottleDirection direction;
+    bool reset_timer_armed;
 } RestartData;
 
 static void coroutine_fn throttle_group_restart_queue_entry(void *opaque)
@@ -403,6 +404,9 @@ static void coroutine_fn 
throttle_group_restart_queue_entry(void *opaque)
     bool empty_queue;
 
     qemu_mutex_lock(&tg->lock);
+    if (data->reset_timer_armed) {
+        tg->any_timer_armed[direction] = false;
+    }
     empty_queue = !throttle_group_co_restart_queue(tgm, direction);
 
     /* If the request queue was empty then we have to take care of
@@ -419,13 +423,15 @@ static void coroutine_fn 
throttle_group_restart_queue_entry(void *opaque)
 }
 
 static void throttle_group_restart_queue(ThrottleGroupMember *tgm,
-                                        ThrottleDirection direction)
+                                         ThrottleDirection direction,
+                                         bool reset_timer_armed)
 {
     Coroutine *co;
     RestartData *rd = g_new0(RestartData, 1);
 
     rd->tgm = tgm;
     rd->direction = direction;
+    rd->reset_timer_armed = reset_timer_armed;
 
     /* This function is called when a timer is fired or when
      * throttle_group_restart_tgm() is called. Either way, there can
@@ -444,15 +450,50 @@ void throttle_group_restart_tgm(ThrottleGroupMember *tgm)
 
     if (tgm->throttle_state) {
         for (dir = THROTTLE_READ; dir < THROTTLE_MAX; dir++) {
-            QEMUTimer *t = tgm->throttle_timers.timers[dir];
+            QEMUTimer *t;
+            ThrottleState *ts = tgm->throttle_state;
+            ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+            bool reset_timer_armed;
+
+            /*
+             * This function restarts the tgm's queue immediately.
+             * This is used for example for callers to drain all requests.
+             * There are three different scenarios depending on whether
+             * a timer is armed for this tg and which tgm owns the timer.
+             */
+
+            qemu_mutex_lock(&tg->lock);
+
+            t = tgm->throttle_timers.timers[dir];
             if (timer_pending(t)) {
-                /* If there's a pending timer on this tgm, fire it now */
+                /*
+                 * Case 1: this tgm has a pending timer.
+                 * We can fire the timer immediately.
+                 */
                 timer_del(t);
-                timer_cb(tgm, dir);
+                reset_timer_armed = true;
+            } else if (tg->any_timer_armed[dir]) {
+                /*
+                 * Case 2: another tgm has a pending timer.
+                 * In this case we can still restart the queue but we
+                 * have to leave any_timer_armed untouched so the
+                 * other tgm's timer is not disrupted.
+                 */
+                reset_timer_armed = false;
             } else {
-                /* Else run the next request from the queue manually */
-                throttle_group_restart_queue(tgm, dir);
+                /*
+                 * Case 3: there is no timer set for this group.
+                 * Here we can simulate a timer that fires immediately,
+                 * so the queue is restarted but no other thread
+                 * can arm a timer in the meantime.
+                 */
+                tg->any_timer_armed[dir] = true;
+                reset_timer_armed = true;
             }
+
+            qemu_mutex_unlock(&tg->lock);
+
+            throttle_group_restart_queue(tgm, dir, reset_timer_armed);
         }
     }
 }
@@ -499,16 +540,13 @@ void throttle_group_get_config(ThrottleGroupMember *tgm, 
ThrottleConfig *cfg)
  */
 static void timer_cb(ThrottleGroupMember *tgm, ThrottleDirection direction)
 {
-    ThrottleState *ts = tgm->throttle_state;
-    ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
-
-    /* The timer has just been fired, so we can update the flag */
-    qemu_mutex_lock(&tg->lock);
-    tg->any_timer_armed[direction] = false;
-    qemu_mutex_unlock(&tg->lock);
-
-    /* Run the request that was waiting for this timer */
-    throttle_group_restart_queue(tgm, direction);
+    /*
+     * Run the request that was waiting for this timer.
+     * tg->any_timer_armed needs to be cleared, but we'll do it later
+     * when the queue is restarted in order to prevent another thread
+     * from arming the timer before that.
+     */
+    throttle_group_restart_queue(tgm, direction, true);
 }
 
 static void read_timer_cb(void *opaque)
-- 
2.47.3


Reply via email to