Re: [External] Re: [PATCH 5/5] mm: memcontrol: use object cgroup for remote memory cgroup charging

2021-03-02 Thread Muchun Song
On Tue, Mar 2, 2021 at 9:29 AM Roman Gushchin  wrote:
>
> On Mon, Mar 01, 2021 at 02:22:27PM +0800, Muchun Song wrote:
> > We spent a lot of energy to make slab accounting do not hold a refcount
> > to memory cgroup, so the dying cgroup can be freed as soon as possible
> > on cgroup offlined.
> >
> > But some users of remote memory cgroup charging (e.g. bpf and fsnotify)
> > hold a refcount to memory cgroup for charging to it later. Actually,
> > the slab core use obj_cgroup APIs for memory cgroup charing, so we can
> > hold a refcount to obj_cgroup instead of memory cgroup. In this case,
> > the infrastructure of remote meory charging also do not hold a refcount
> > to memory cgroup.
>
> -cc all except mm folks
>
> Same here, let's not switch the remote charging infra to objcg to save
> an ability to use it for user pages. If we have a real problem with bpf/...,
> let's solve it case by case.

How about rename current->active_memcg to current->active_memcg_data?
If the lowest bit of active_memcg_data  is one, it points to an object cgroup.
Otherwise, it points to a memoy cgroup. Just like page->memcg_data.
In this case, we can charge objects by using obj_cgroup APIs.
Just my thought.

Thanks.

>
> Thanks!
>
> >
> > Signed-off-by: Muchun Song 
> > ---
> >  fs/buffer.c  | 10 --
> >  fs/notify/fanotify/fanotify.c|  6 ++--
> >  fs/notify/fanotify/fanotify_user.c   |  2 +-
> >  fs/notify/group.c|  3 +-
> >  fs/notify/inotify/inotify_fsnotify.c |  8 ++---
> >  fs/notify/inotify/inotify_user.c |  2 +-
> >  include/linux/bpf.h  |  2 +-
> >  include/linux/fsnotify_backend.h |  2 +-
> >  include/linux/memcontrol.h   | 15 
> >  include/linux/sched.h|  4 +--
> >  include/linux/sched/mm.h | 28 +++
> >  kernel/bpf/syscall.c | 35 +--
> >  kernel/fork.c|  2 +-
> >  mm/memcontrol.c  | 66 
> > 
> >  14 files changed, 121 insertions(+), 64 deletions(-)
> >
> > diff --git a/fs/buffer.c b/fs/buffer.c
> > index 591547779dbd..cc99fcf66368 100644
> > --- a/fs/buffer.c
> > +++ b/fs/buffer.c
> > @@ -842,14 +842,16 @@ struct buffer_head *alloc_page_buffers(struct page 
> > *page, unsigned long size,
> >   struct buffer_head *bh, *head;
> >   gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
> >   long offset;
> > - struct mem_cgroup *memcg, *old_memcg;
> > + struct mem_cgroup *memcg;
> > + struct obj_cgroup *objcg, *old_objcg;
> >
> >   if (retry)
> >   gfp |= __GFP_NOFAIL;
> >
> >   /* The page lock pins the memcg */
> >   memcg = page_memcg(page);
> > - old_memcg = set_active_memcg(memcg);
> > + objcg = get_obj_cgroup_from_mem_cgroup(memcg);
> > + old_objcg = set_active_obj_cgroup(objcg);
> >
> >   head = NULL;
> >   offset = PAGE_SIZE;
> > @@ -868,7 +870,9 @@ struct buffer_head *alloc_page_buffers(struct page 
> > *page, unsigned long size,
> >   set_bh_page(bh, page, offset);
> >   }
> >  out:
> > - set_active_memcg(old_memcg);
> > + set_active_obj_cgroup(old_objcg);
> > + if (objcg)
> > + obj_cgroup_put(objcg);
> >   return head;
> >  /*
> >   * In case anything failed, we just free everything we got.
> > diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> > index 1192c9953620..04d24acfffc7 100644
> > --- a/fs/notify/fanotify/fanotify.c
> > +++ b/fs/notify/fanotify/fanotify.c
> > @@ -530,7 +530,7 @@ static struct fanotify_event 
> > *fanotify_alloc_event(struct fsnotify_group *group,
> >   struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
> >   const struct path *path = fsnotify_data_path(data, data_type);
> >   unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
> > - struct mem_cgroup *old_memcg;
> > + struct obj_cgroup *old_objcg;
> >   struct inode *child = NULL;
> >   bool name_event = false;
> >
> > @@ -580,7 +580,7 @@ static struct fanotify_event 
> > *fanotify_alloc_event(struct fsnotify_group *group,
> >   gfp |= __GFP_RETRY_MAYFAIL;
> >
> >   /* Whoever is interested in the event, pays for the allocation. */
> > - old_memcg = set_active_memcg(group->memcg);
> > + old_objcg = set_active_obj_cgroup(group->objcg);
> >
> >   if (fanotify_is_perm_event(mask)) {
> >   event = fanotify_alloc_perm_event(path, gfp);
> > @@ -608,7 +608,7 @@ static struct fanotify_event 
> > *fanotify_alloc_event(struct fsnotify_group *group,
> >   event->pid = get_pid(task_tgid(current));
> >
> >  out:
> > - set_active_memcg(old_memcg);
> > + set_active_obj_cgroup(old_objcg);
> >   return event;
> >  }
> >
> > diff --git a/fs/notify/fanotify/fanotify_user.c 
> > b/fs/notify/fanotify/fanotify_user.c
> > index 9e0c1afac8bd..055ca36d4e0e 100644
> > --- 

Re: [PATCH 5/5] mm: memcontrol: use object cgroup for remote memory cgroup charging

2021-03-01 Thread Roman Gushchin
On Mon, Mar 01, 2021 at 02:22:27PM +0800, Muchun Song wrote:
> We spent a lot of energy to make slab accounting do not hold a refcount
> to memory cgroup, so the dying cgroup can be freed as soon as possible
> on cgroup offlined.
> 
> But some users of remote memory cgroup charging (e.g. bpf and fsnotify)
> hold a refcount to memory cgroup for charging to it later. Actually,
> the slab core use obj_cgroup APIs for memory cgroup charing, so we can
> hold a refcount to obj_cgroup instead of memory cgroup. In this case,
> the infrastructure of remote meory charging also do not hold a refcount
> to memory cgroup.

-cc all except mm folks

Same here, let's not switch the remote charging infra to objcg to save
an ability to use it for user pages. If we have a real problem with bpf/...,
let's solve it case by case.

Thanks!

> 
> Signed-off-by: Muchun Song 
> ---
>  fs/buffer.c  | 10 --
>  fs/notify/fanotify/fanotify.c|  6 ++--
>  fs/notify/fanotify/fanotify_user.c   |  2 +-
>  fs/notify/group.c|  3 +-
>  fs/notify/inotify/inotify_fsnotify.c |  8 ++---
>  fs/notify/inotify/inotify_user.c |  2 +-
>  include/linux/bpf.h  |  2 +-
>  include/linux/fsnotify_backend.h |  2 +-
>  include/linux/memcontrol.h   | 15 
>  include/linux/sched.h|  4 +--
>  include/linux/sched/mm.h | 28 +++
>  kernel/bpf/syscall.c | 35 +--
>  kernel/fork.c|  2 +-
>  mm/memcontrol.c  | 66 
> 
>  14 files changed, 121 insertions(+), 64 deletions(-)
> 
> diff --git a/fs/buffer.c b/fs/buffer.c
> index 591547779dbd..cc99fcf66368 100644
> --- a/fs/buffer.c
> +++ b/fs/buffer.c
> @@ -842,14 +842,16 @@ struct buffer_head *alloc_page_buffers(struct page 
> *page, unsigned long size,
>   struct buffer_head *bh, *head;
>   gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
>   long offset;
> - struct mem_cgroup *memcg, *old_memcg;
> + struct mem_cgroup *memcg;
> + struct obj_cgroup *objcg, *old_objcg;
>  
>   if (retry)
>   gfp |= __GFP_NOFAIL;
>  
>   /* The page lock pins the memcg */
>   memcg = page_memcg(page);
> - old_memcg = set_active_memcg(memcg);
> + objcg = get_obj_cgroup_from_mem_cgroup(memcg);
> + old_objcg = set_active_obj_cgroup(objcg);
>  
>   head = NULL;
>   offset = PAGE_SIZE;
> @@ -868,7 +870,9 @@ struct buffer_head *alloc_page_buffers(struct page *page, 
> unsigned long size,
>   set_bh_page(bh, page, offset);
>   }
>  out:
> - set_active_memcg(old_memcg);
> + set_active_obj_cgroup(old_objcg);
> + if (objcg)
> + obj_cgroup_put(objcg);
>   return head;
>  /*
>   * In case anything failed, we just free everything we got.
> diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
> index 1192c9953620..04d24acfffc7 100644
> --- a/fs/notify/fanotify/fanotify.c
> +++ b/fs/notify/fanotify/fanotify.c
> @@ -530,7 +530,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
> fsnotify_group *group,
>   struct inode *dirid = fanotify_dfid_inode(mask, data, data_type, dir);
>   const struct path *path = fsnotify_data_path(data, data_type);
>   unsigned int fid_mode = FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS);
> - struct mem_cgroup *old_memcg;
> + struct obj_cgroup *old_objcg;
>   struct inode *child = NULL;
>   bool name_event = false;
>  
> @@ -580,7 +580,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
> fsnotify_group *group,
>   gfp |= __GFP_RETRY_MAYFAIL;
>  
>   /* Whoever is interested in the event, pays for the allocation. */
> - old_memcg = set_active_memcg(group->memcg);
> + old_objcg = set_active_obj_cgroup(group->objcg);
>  
>   if (fanotify_is_perm_event(mask)) {
>   event = fanotify_alloc_perm_event(path, gfp);
> @@ -608,7 +608,7 @@ static struct fanotify_event *fanotify_alloc_event(struct 
> fsnotify_group *group,
>   event->pid = get_pid(task_tgid(current));
>  
>  out:
> - set_active_memcg(old_memcg);
> + set_active_obj_cgroup(old_objcg);
>   return event;
>  }
>  
> diff --git a/fs/notify/fanotify/fanotify_user.c 
> b/fs/notify/fanotify/fanotify_user.c
> index 9e0c1afac8bd..055ca36d4e0e 100644
> --- a/fs/notify/fanotify/fanotify_user.c
> +++ b/fs/notify/fanotify/fanotify_user.c
> @@ -985,7 +985,7 @@ SYSCALL_DEFINE2(fanotify_init, unsigned int, flags, 
> unsigned int, event_f_flags)
>   group->fanotify_data.user = user;
>   group->fanotify_data.flags = flags;
>   atomic_inc(>fanotify_listeners);
> - group->memcg = get_mem_cgroup_from_mm(current->mm);
> + group->objcg = get_obj_cgroup_from_current();
>  
>   group->overflow_event = fanotify_alloc_overflow_event();
>   if (unlikely(!group->overflow_event)) {
> diff --git