Re: [PATCH RESEND] blk-throttle: dispatch more sync writes in block throttle layer

2018-02-08 Thread Tejun Heo
Hello,

On Thu, Feb 08, 2018 at 11:39:19AM +0800, xuejiufei wrote:
> Hi Tejun,
> 
> Could you please kindly review this patch or give some advice?

I don't have anything against it but let's wait for Shaohua.

Thanks.

-- 
tejun


Re: [PATCH RESEND] blk-throttle: dispatch more sync writes in block throttle layer

2018-02-07 Thread xuejiufei
Hi Tejun,

Could you please kindly review this patch or give some advice?

Thanks.
Jiufei Xue

On 2018/1/23 上午10:08, xuejiufei wrote:
> Cgroup writeback is supported since v4.2. But there exists a problem
> in the following case.
> 
> A cgroup may send both buffer and direct/sync IOs. The foreground
> thread will be stalled when periodic writeback IOs is flushed because
> the service queue in block throttle layer already has a plenty of
> writeback IOs, then foreground IOs should be enqueued with its FIFO
> policy. The current policy is dispatching 6 reads and 2 writes during
> each round, sync writes will be significantly delayed.
> 
> This patch adds another queue in block throttle. Now there are 3 queues
> in a service queue: read, sync write, async write, and we can dispatch
> more sync writes than aync writes.
> 
> Test:
> 1) setup a memcg and a blkcg with uplimit bandwidth 20m/s;
> 2) start a script writing 5G buffer to page cache and start to sync
> 3) start sync writes:
> dd if=/dev/zero of=test bs=4k count=12800 oflag=direct conv=notrunc
> 
> Unpatched:
> 52428800 bytes (52 MB) copied, 307.065 s, 171 kB/s
> 
> Patched:
> 52428800 bytes (52 MB) copied, 13.8192 s, 3.8 MB/s
> 
> Signed-off-by: Jiufei Xue 
> ---
>  block/blk-throttle.c | 195 
> ++-
>  1 file changed, 129 insertions(+), 66 deletions(-)
> 
> diff --git a/block/blk-throttle.c b/block/blk-throttle.c
> index d19f416..842257e 100644
> --- a/block/blk-throttle.c
> +++ b/block/blk-throttle.c
> @@ -14,10 +14,10 @@
>  #include "blk.h"
>  
>  /* Max dispatch from a group in 1 round */
> -static int throtl_grp_quantum = 8;
> +static int throtl_grp_quantum = 32;
>  
>  /* Total max dispatch from all groups in one round */
> -static int throtl_quantum = 32;
> +static int throtl_quantum = 128;
>  
>  /* Throttling is performed over a slice and after that slice is renewed */
>  #define DFL_THROTL_SLICE_HD (HZ / 10)
> @@ -43,6 +43,12 @@
>  /* A workqueue to queue throttle related work */
>  static struct workqueue_struct *kthrotld_workqueue;
>  
> +enum wl_type {
> + READ_WORKLOAD = 0,
> + SYNC_WRITE_WORKLOAD = 1,
> + ASYNC_WRITE_WORKLOAD = 2
> +};
> +
>  /*
>   * To implement hierarchical throttling, throtl_grps form a tree and bios
>   * are dispatched upwards level by level until they reach the top and get
> @@ -79,8 +85,11 @@ struct throtl_service_queue {
>* Bios queued directly to this service_queue or dispatched from
>* children throtl_grp's.
>*/
> - struct list_headqueued[2];  /* throtl_qnode [READ/WRITE] */
> - unsigned intnr_queued[2];   /* number of queued bios */
> + /* throtl_qnode [READ/WRITE/ASYNC_WRITE] */
> + struct list_headqueued[3];
> +
> + unsigned intnr_queued[3];   /* number of queued bios */
> +
>  
>   /*
>* RB tree of active children throtl_grp's, which are sorted by
> @@ -127,8 +136,8 @@ struct throtl_grp {
>* with the sibling qnode_on_parents and the parent's
>* qnode_on_self.
>*/
> - struct throtl_qnode qnode_on_self[2];
> - struct throtl_qnode qnode_on_parent[2];
> + struct throtl_qnode qnode_on_self[3];
> + struct throtl_qnode qnode_on_parent[3];
>  
>   /*
>* Dispatch time in jiffies. This is the estimated time when group
> @@ -202,7 +211,7 @@ struct throtl_data
>   struct request_queue *queue;
>  
>   /* Total Number of queued bios on READ and WRITE lists */
> - unsigned int nr_queued[2];
> + unsigned int nr_queued[3];
>  
>   unsigned int throtl_slice;
>  
> @@ -274,6 +283,18 @@ static struct throtl_data *sq_to_td(struct 
> throtl_service_queue *sq)
>   return container_of(sq, struct throtl_data, service_queue);
>  }
>  
> +static inline enum wl_type bio_workload_type(struct bio *bio)
> +{
> + return bio_data_dir(bio) ?
> +((bio->bi_opf & REQ_SYNC) ? SYNC_WRITE_WORKLOAD :
> +ASYNC_WRITE_WORKLOAD) : READ_WORKLOAD;
> +}
> +
> +static inline bool wl_to_rw(enum wl_type type)
> +{
> + return type >= SYNC_WRITE_WORKLOAD;
> +}
> +
>  /*
>   * cgroup's limit in LIMIT_MAX is scaled if low limit is set. This scale is 
> to
>   * make the IO dispatch more smooth.
> @@ -475,8 +496,9 @@ static struct bio *throtl_pop_queued(struct list_head 
> *queued,
>  /* init a service_queue, assumes the caller zeroed it */
>  static void throtl_service_queue_init(struct throtl_service_queue *sq)
>  {
> - INIT_LIST_HEAD(&sq->queued[0]);
> - INIT_LIST_HEAD(&sq->queued[1]);
> + INIT_LIST_HEAD(&sq->queued[READ_WORKLOAD]);
> + INIT_LIST_HEAD(&sq->queued[SYNC_WRITE_WORKLOAD]);
> + INIT_LIST_HEAD(&sq->queued[ASYNC_WRITE_WORKLOAD]);
>   sq->pending_tree = RB_ROOT;
>   timer_setup(&sq->pending_timer, throtl_pending_timer_fn, 0);
>  }
> @@ -484,7 +506,7 @@ static void throtl_service_queue_init(struct 
> throtl_service_queue *sq)
>  static struct blkg_p