On Wed, Aug 23, 2017 at 03:21:25PM -0400, Vivek Goyal wrote:
> On Wed, Aug 23, 2017 at 11:15:15AM -0700, Shaohua Li wrote:
> > From: Shaohua Li <s...@fb.com>
> > 
> > Not a merge request, for discussion only.
> > 
> > loop block device handles IO in a separate thread. The actual IO
> > dispatched isn't cloned from the IO loop device received, so the
> > dispatched IO loses the cgroup context.
> > 
> > I'm ignoring buffer IO case now, which is quite complicated.  Making the
> > loop thread aware of cgroup context doesn't really help. The loop device
> > only writes to a single file. In current writeback cgroup
> > implementation, the file can only belong to one cgroup.
> > 
> > For direct IO case, we could workaround the issue in theory. For
> > example, say we assign cgroup1 5M/s BW for loop device and cgroup2
> > 10M/s. We can create a special cgroup for loop thread and assign at
> > least 15M/s for the underlayer disk. In this way, we correctly throttle
> > the two cgroups. But this is tricky to setup.
> > 
> > This patch tries to address the issue. When loop thread is handling IO,
> > it declares the IO is on behalf of the original task, then in block IO
> > we use the original task to find cgroup. The concept probably works for
> > other scenarios too, but right now I don't make it generic yet.
> 
> Hi Shaohua, 
> 
> Can worker thread switch/move to the cgroup bio is in and do the submision
> and then switch back. That way IO submitted by worker should be accounted
> to the cgroup bio belongs to. Just a thought. I don't even know if that's
> feasible.

That is my first attempt, but looks moving thread to a cgroup is an
expensive operation.

Thanks,
Shaohua

> 
> Vivek
> 
> > 
> > Signed-off-by: Shaohua Li <s...@fb.com>
> > ---
> >  block/bio.c                |  5 ++++-
> >  drivers/block/loop.c       | 14 +++++++++++++-
> >  drivers/block/loop.h       |  1 +
> >  include/linux/blk-cgroup.h |  3 ++-
> >  include/linux/sched.h      |  1 +
> >  5 files changed, 21 insertions(+), 3 deletions(-)
> > 
> > diff --git a/block/bio.c b/block/bio.c
> > index e241bbc..8f0df3c 100644
> > --- a/block/bio.c
> > +++ b/block/bio.c
> > @@ -2058,7 +2058,10 @@ int bio_associate_current(struct bio *bio)
> >  
> >     get_io_context_active(ioc);
> >     bio->bi_ioc = ioc;
> > -   bio->bi_css = task_get_css(current, io_cgrp_id);
> > +   if (current->cgroup_task)
> > +           bio->bi_css = task_get_css(current->cgroup_task, io_cgrp_id);
> > +   else
> > +           bio->bi_css = task_get_css(current, io_cgrp_id);
> >     return 0;
> >  }
> >  EXPORT_SYMBOL_GPL(bio_associate_current);
> > diff --git a/drivers/block/loop.c b/drivers/block/loop.c
> > index ef83349..fefede3 100644
> > --- a/drivers/block/loop.c
> > +++ b/drivers/block/loop.c
> > @@ -77,7 +77,7 @@
> >  #include <linux/falloc.h>
> >  #include <linux/uio.h>
> >  #include "loop.h"
> > -
> > +#include <linux/sched/task.h>
> >  #include <linux/uaccess.h>
> >  
> >  static DEFINE_IDR(loop_index_idr);
> > @@ -471,6 +471,8 @@ static void lo_rw_aio_complete(struct kiocb *iocb, long 
> > ret, long ret2)
> >  {
> >     struct loop_cmd *cmd = container_of(iocb, struct loop_cmd, iocb);
> >  
> > +   if (cmd->cgroup_task)
> > +           put_task_struct(cmd->cgroup_task);
> >     cmd->ret = ret;
> >     blk_mq_complete_request(cmd->rq);
> >  }
> > @@ -502,11 +504,16 @@ static int lo_rw_aio(struct loop_device *lo, struct 
> > loop_cmd *cmd,
> >     cmd->iocb.ki_complete = lo_rw_aio_complete;
> >     cmd->iocb.ki_flags = IOCB_DIRECT;
> >  
> > +   if (cmd->cgroup_task)
> > +           current->cgroup_task = cmd->cgroup_task;
> > +
> >     if (rw == WRITE)
> >             ret = call_write_iter(file, &cmd->iocb, &iter);
> >     else
> >             ret = call_read_iter(file, &cmd->iocb, &iter);
> >  
> > +   current->cgroup_task = NULL;
> > +
> >     if (ret != -EIOCBQUEUED)
> >             cmd->iocb.ki_complete(&cmd->iocb, ret, 0);
> >     return 0;
> > @@ -1705,6 +1712,11 @@ static blk_status_t loop_queue_rq(struct 
> > blk_mq_hw_ctx *hctx,
> >             break;
> >     }
> >  
> > +   if (cmd->use_aio) {
> > +           cmd->cgroup_task = current;
> > +           get_task_struct(current);
> > +   } else
> > +           cmd->cgroup_task = NULL;
> >     kthread_queue_work(&lo->worker, &cmd->work);
> >  
> >     return BLK_STS_OK;
> > diff --git a/drivers/block/loop.h b/drivers/block/loop.h
> > index 2c096b9..eb98d4d 100644
> > --- a/drivers/block/loop.h
> > +++ b/drivers/block/loop.h
> > @@ -73,6 +73,7 @@ struct loop_cmd {
> >     bool use_aio;           /* use AIO interface to handle I/O */
> >     long ret;
> >     struct kiocb iocb;
> > +   struct task_struct *cgroup_task;
> >  };
> >  
> >  /* Support for loadable transfer modules */
> > diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h
> > index 9d92153..38a5517 100644
> > --- a/include/linux/blk-cgroup.h
> > +++ b/include/linux/blk-cgroup.h
> > @@ -232,7 +232,8 @@ static inline struct blkcg *bio_blkcg(struct bio *bio)
> >  {
> >     if (bio && bio->bi_css)
> >             return css_to_blkcg(bio->bi_css);
> > -   return task_blkcg(current);
> > +   return task_blkcg(current->cgroup_task ?
> > +                   current->cgroup_task : current);
> >  }
> >  
> >  static inline struct cgroup_subsys_state *
> > diff --git a/include/linux/sched.h b/include/linux/sched.h
> > index 8337e2d..a5958b0 100644
> > --- a/include/linux/sched.h
> > +++ b/include/linux/sched.h
> > @@ -897,6 +897,7 @@ struct task_struct {
> >     struct css_set __rcu            *cgroups;
> >     /* cg_list protected by css_set_lock and tsk->alloc_lock: */
> >     struct list_head                cg_list;
> > +   struct task_struct              *cgroup_task;
> >  #endif
> >  #ifdef CONFIG_INTEL_RDT_A
> >     int                             closid;
> > -- 
> > 2.9.5

Reply via email to