Re: [PATCH] block: fix io hung by block throttle

2021-04-19 Thread Junxiao Bi



On 4/18/21 11:09 PM, Junxiao Bi wrote:

- finish_wait(>wait, );
+    mutex_lock(>throttle_mutex);
+    wait_event(rqw->wait, acquire_inflight_cb(rqw, private_data));
+    mutex_unlock(>throttle_mutex);


This will break the throttle? There is a inflight io limitation. With 
this change, there can be only one io inflight whatever the limit is.

Sorry, ignore this. I should go sleep that time.


Thanks,

Junxiao.


Re: [PATCH] block: fix io hung by block throttle

2021-04-19 Thread Junxiao Bi



On 4/18/21 5:33 AM, Hillf Danton wrote:

On Sat, 17 Apr 2021 14:37:57  Junxiao Bi  wrote:

On 4/17/21 3:10 AM, Hillf Danton wrote:

+   if (acquire_inflight_cb(rqw, private_data))

This function is to increase atomic variable rq_wait->inflight.

You are right.


What's the mutex for?

It cuts the race between we peek at the sleepers on rqw->wait while they are
coming and going, and we cant update rqw->inflight without making sure there
are no sleepers.


Why? I think checking the sleeper in original code is for a fast path.

For wbt, acquire_inflight_cb is wbt_inflight_cb where atomic_inc_below 
is used to update rqw->inflight. I don't see why a mutex is needed for 
this atomic operation.




With the mutex in place, in addition to the certainty of !sleepers, we can
avoid the race between us and waker in terms of updating inflight by removing
the invokation of acquire_inflight_cb in the wakeup callback, and the bonus is
we no longer need the wakeup cb and the rq_qos_wait_data because the more
traditional wait_event() can do the job.

Finally we can dump the cleanup_cb_t.

+++ b/block/blk-rq-qos.c
@@ -200,96 +200,24 @@ bool rq_depth_scale_down(struct rq_depth
return true;
  }
  
-struct rq_qos_wait_data {

-   struct wait_queue_entry wq;
-   struct task_struct *task;
-   struct rq_wait *rqw;
-   acquire_inflight_cb_t *cb;
-   void *private_data;
-   bool got_token;
-};
-
-static int rq_qos_wake_function(struct wait_queue_entry *curr,
-   unsigned int mode, int wake_flags, void *key)
-{
-   struct rq_qos_wait_data *data = container_of(curr,
-struct rq_qos_wait_data,
-wq);
-
-   /*
-* If we fail to get a budget, return -1 to interrupt the wake up loop
-* in __wake_up_common.
-*/
-   if (!data->cb(data->rqw, data->private_data))
-   return -1;
-
-   data->got_token = true;
-   smp_wmb();
-   list_del_init(>entry);
-   wake_up_process(data->task);
-   return 1;
-}
-
  /**
   * rq_qos_wait - throttle on a rqw if we need to
   * @rqw: rqw to throttle on
   * @private_data: caller provided specific data
   * @acquire_inflight_cb: inc the rqw->inflight counter if we can
- * @cleanup_cb: the callback to cleanup in case we race with a waker
   *
   * This provides a uniform place for the rq_qos users to do their throttling.
   * Since you can end up with a lot of things sleeping at once, this manages 
the
   * waking up based on the resources available.  The acquire_inflight_cb should
   * inc the rqw->inflight if we have the ability to do so, or return false if 
not
   * and then we will sleep until the room becomes available.
- *
- * cleanup_cb is in case that we race with a waker and need to cleanup the
- * inflight count accordingly.
   */
  void rq_qos_wait(struct rq_wait *rqw, void *private_data,
-acquire_inflight_cb_t *acquire_inflight_cb,
-cleanup_cb_t *cleanup_cb)
+acquire_inflight_cb_t *acquire_inflight_cb)
  {
-   struct rq_qos_wait_data data = {
-   .wq = {
-   .func   = rq_qos_wake_function,
-   .entry  = LIST_HEAD_INIT(data.wq.entry),
-   },
-   .task = current,
-   .rqw = rqw,
-   .cb = acquire_inflight_cb,
-   .private_data = private_data,
-   };
-   bool has_sleeper;
-
-   has_sleeper = wq_has_sleeper(>wait);
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
-   return;
-
-   prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);
-   has_sleeper = !wq_has_single_sleeper(>wait);
-   do {
-   /* The memory barrier in set_task_state saves us here. */
-   if (data.got_token)
-   break;
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
-   finish_wait(>wait, );
-
-   /*
-* We raced with wbt_wake_function() getting a token,
-* which means we now have two. Put our local token
-* and wake anyone else potentially waiting for one.
-*/
-   smp_rmb();
-   if (data.got_token)
-   cleanup_cb(rqw, private_data);
-   break;
-   }
-   io_schedule();
-   has_sleeper = true;
-   set_current_state(TASK_UNINTERRUPTIBLE);
-   } while (1);
-   finish_wait(>wait, );
+   mutex_lock(>throttle_mutex);
+   wait_event(rqw->wait, acquire_inflight_cb(rqw, private_data));
+   mutex_unlock(>throttle_mutex);


This will break the throttle? There is a inflight io limitation. With 
this change, there can be only one io 

Re: [PATCH] block: fix io hung by block throttle

2021-04-17 Thread Junxiao Bi

On 4/17/21 3:10 AM, Hillf Danton wrote:


--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -260,19 +260,17 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
.cb = acquire_inflight_cb,
.private_data = private_data,
};
-   bool has_sleeper;
  
-	has_sleeper = wq_has_sleeper(>wait);

-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
+   if (!wq_has_sleeper(>wait)
+   && acquire_inflight_cb(rqw, private_data))
return;
  
  	prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

-   has_sleeper = !wq_has_single_sleeper(>wait);
do {
/* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+   if (acquire_inflight_cb(rqw, private_data)) {
finish_wait(>wait, );

Simply removing !has_sleeper is not enough if it is mandatory before
acquire_inflight_cb() without adding something like a mutex to sieve the
concurrent sleepers out, see below.


--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -260,19 +260,18 @@ void rq_qos_wait(struct rq_wait *rqw, vo
.cb = acquire_inflight_cb,
.private_data = private_data,
};
-   bool has_sleeper;
  
-	has_sleeper = wq_has_sleeper(>wait);

-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
-   return;
+   mutex_lock(>mutex);
+
+   if (acquire_inflight_cb(rqw, private_data))


This function is to increase atomic variable rq_wait->inflight. What's 
the mutex for?


Thanks,

Junxiao.


+   goto out;
  
  	prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

-   has_sleeper = !wq_has_single_sleeper(>wait);
do {
/* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+   if (acquire_inflight_cb(rqw, private_data)) {
finish_wait(>wait, );
  
  			/*

@@ -286,10 +285,11 @@ void rq_qos_wait(struct rq_wait *rqw, vo
break;
}
io_schedule();
-   has_sleeper = true;
set_current_state(TASK_UNINTERRUPTIBLE);
} while (1);
finish_wait(>wait, );
+out:
+   mutex_unlock(>mutex);
  }


Re: [PATCH] block: fix io hung by block throttle

2021-04-14 Thread Junxiao Bi

On 4/14/21 9:11 PM, Hillf Danton wrote:


On Wed, 14 Apr 2021 14:18:30 Junxiao Bi wrote:

There is a race bug which can cause io hung when multiple processes
run parallel in rq_qos_wait().
Let assume there were 4 processes P1/P2/P3/P4, P1/P2 were at the entry
of rq_qos_wait, and P3/P4 were waiting for io done, 2 io were inflight,
the inflight io limit was 2. See race below.

void rq_qos_wait()
{
...
 bool has_sleeper;

 P3/P4 were in sleeper list, has_sleeper was true for both P1 and 
P2.
 has_sleeper = wq_has_sleeper(>wait);
 if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
 return;

 2 inflight io done, P3/P4 were waken up to issue 2 new io.
 2 new io done, no inflight io.

 P1/P2 were added to the sleeper list, 2 entry in the list
 prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

 P1/P2 were in the sleeper list, has_sleeper was true for P1/P2.
 has_sleeper = !wq_has_single_sleeper(>wait);
 do {
 /* The memory barrier in set_task_state saves us here. */
 if (data.got_token)
 break;
 if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
 finish_wait(>wait, );

 /*
  * We raced with wbt_wake_function() getting a token,
  * which means we now have two. Put our local token
  * and wake anyone else potentially waiting for one.
  */
 smp_rmb();
 if (data.got_token)
 cleanup_cb(rqw, private_data);
 break;
 }

 P1/P2 hung here forever. New io requests will also hung here.
 io_schedule();
 has_sleeper = true;
 set_current_state(TASK_UNINTERRUPTIBLE);
 } while (1);
 finish_wait(>wait, );
}

Cc: sta...@vger.kernel.org
Signed-off-by: Junxiao Bi 
---
  block/blk-rq-qos.c | 9 +++--
  1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 656460636ad3..04d888c99bc0 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -260,19 +260,17 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
.cb = acquire_inflight_cb,
.private_data = private_data,
};
-   bool has_sleeper;
  
-	has_sleeper = wq_has_sleeper(>wait);

-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data))
+   if (!wq_has_sleeper(>wait)
+   && acquire_inflight_cb(rqw, private_data))
return;
  
  	prepare_to_wait_exclusive(>wait, , TASK_UNINTERRUPTIBLE);

-   has_sleeper = !wq_has_single_sleeper(>wait);
do {
/* The memory barrier in set_task_state saves us here. */
if (data.got_token)
break;
-   if (!has_sleeper && acquire_inflight_cb(rqw, private_data)) {
+   if (acquire_inflight_cb(rqw, private_data)) {
finish_wait(>wait, );
  
  			/*

@@ -286,7 +284,6 @@ void rq_qos_wait(struct rq_wait *rqw, void *private_data,
break;
}
io_schedule();
-   has_sleeper = true;
set_current_state(TASK_UNINTERRUPTIBLE);
} while (1);
finish_wait(>wait, );
--
2.24.3 (Apple Git-128)


No wakeup may cause the hang.

--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -287,7 +287,8 @@ void rq_qos_wait(struct rq_wait *rqw, vo
}
io_schedule();
has_sleeper = true;
-   set_current_state(TASK_UNINTERRUPTIBLE);
+   prepare_to_wait_exclusive(>wait, ,
+   TASK_UNINTERRUPTIBLE);


From rq_qos_wake_function(), the process can be waken up and removed 
from the sleeper list only when it get the budget. Looks not necessary 
to re-add it to sleeper list again.


Thanks,

Junxiao.


} while (1);
finish_wait(>wait, );
  }