On 18.05.2020 18:14, Andrey Ryabinin wrote:
> Current css_stacks debugging isn't very usefull, it records only ~128 stacks
> which is not enough and doesn't allow us to identify the problem.
> Instead of recording full stacks, record only instruction pointer
> and the counter of how often we hit it.
> 
> Example of output:
>  css_relese: css_get/put ips
>   1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0
>   851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0
>   776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0
>   776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0
>   70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0
>   166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190
>   776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80
>   614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70
>   116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190
>   116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110
>   352, [ffffffffb9714764] ub_total_pages+0x64/0xa0
>   51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80
>   51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270
>   22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80
>   1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70
>   1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0
>   1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0
>   1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0
> 
> https://jira.sw.ru/browse/PSBM-98148
> Signed-off-by: Andrey Ryabinin <[email protected]>

Reviewed-by: Kirill Tkhai <[email protected]>

> ---
>  include/linux/cgroup.h | 22 ++++++++++++++--------
>  kernel/cgroup.c        | 37 ++++++++++++++++++-------------------
>  mm/memcontrol.c        | 18 ++++++++++--------
>  3 files changed, 42 insertions(+), 35 deletions(-)
> 
> diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
> index 1c1ee0c458e0..919cddd78168 100644
> --- a/include/linux/cgroup.h
> +++ b/include/linux/cgroup.h
> @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys 
> *ss);
>  
>  extern int proc_cgroup_show(struct seq_file *, void *);
>  
> +#define CSS_IPS_COUNT 512
> +/* Instruction pointer and count */
>  struct css_stacks {
> -     atomic_t offset;
> -     unsigned long stacks[(PAGE_SIZE*2)/8 - 1];
> +     unsigned long ips[CSS_IPS_COUNT];
> +     atomic_t count[CSS_IPS_COUNT];
>  };
>  
>  /* Per-subsystem/per-cgroup state maintained by the system. */
> @@ -154,7 +156,7 @@ enum {
>  extern struct static_key css_stacks_on;
>  void __save_css_stack(struct cgroup_subsys_state *css);
>  
> -static inline void save_css_stack(struct cgroup_subsys_state *css)
> +static __always_inline void save_css_stack(struct cgroup_subsys_state *css)
>  {
>       if (static_key_false(&css_stacks_on))
>               __save_css_stack(css);
> @@ -166,7 +168,7 @@ static inline void save_css_stack(struct 
> cgroup_subsys_state *css)
>   * - an existing ref-counted reference to the css
>   * - task->cgroups for a locked task
>   */
> -static inline void css_get(struct cgroup_subsys_state *css)
> +static __always_inline void css_get(struct cgroup_subsys_state *css)
>  {
>       /* We don't need to reference count the root state */
>       if (!(css->flags & CSS_ROOT)) {
> @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state 
> *css)
>   * the css has been destroyed.
>   */
>  
> -static inline bool css_tryget(struct cgroup_subsys_state *css)
> +static __always_inline bool css_tryget(struct cgroup_subsys_state *css)
>  {
> +     bool ret;
> +
>       if (css->flags & CSS_ROOT)
>               return true;
> -     save_css_stack(css);
> -     return percpu_ref_tryget_live(&css->refcnt);
> +     ret = percpu_ref_tryget_live(&css->refcnt);
> +     if (ret)
> +             save_css_stack(css);
> +     return ret;
>  }
>  
>  /*
> @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state 
> *css)
>   * css_get() or css_tryget()
>   */
>  
> -static inline void css_put(struct cgroup_subsys_state *css)
> +static __always_inline void css_put(struct cgroup_subsys_state *css)
>  {
>       if (!(css->flags & CSS_ROOT)) {
>               save_css_stack(css);
> diff --git a/kernel/cgroup.c b/kernel/cgroup.c
> index bf218e88ac9d..389128d37334 100644
> --- a/kernel/cgroup.c
> +++ b/kernel/cgroup.c
> @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work)
>  
>       css_stacks = css->stacks;
>       if (css_stacks)
> -             free_pages((unsigned long)css_stacks, 1);
> +             kfree(css_stacks);
>  
>       percpu_ref_exit(&css->refcnt);
>       cgroup_dput(css->cgroup);
> @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state 
> *css,
>  {
>       css->cgroup = cgrp;
>       css->flags = 0;
> -     if (static_key_false(&css_stacks_on) && slab_is_available())
> -             css->stacks = (struct css_stacks *)
> -                     __get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1);
> +     if (static_key_false(&css_stacks_on) && slab_is_available() &&
> +         ss == &mem_cgroup_subsys)
> +             css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL);
>       else
>               css->stacks = 0;
>       if (cgrp == dummytop)
> @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, 
> struct cgroup *cgrp)
>  
>  void __save_css_stack(struct cgroup_subsys_state *css)
>  {
> -     unsigned long entries[8];
> -     unsigned int offset;
> +     int i;
>       struct css_stacks *css_stacks;
> -     struct stack_trace trace = {
> -             .nr_entries = 0,
> -             .entries = entries,
> -             .max_entries = 8,
> -             .skip = 0
> -     };
> +     unsigned long ip = _RET_IP_;
>  
>       css_stacks = css->stacks;
>       if (!css_stacks)
>               return;
>  
> -     memset(entries, 0, sizeof(entries));
> -     offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2);
> -     if (offset == 0) {
> -             offset += 8;
> -             trace.max_entries = 7;
> +     for (i = 0; i < CSS_IPS_COUNT; i++) {

I hope this won't too slow.

> +             if (css_stacks->ips[i] == 0) {
> +                     if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0)
> +                             continue;
> +                     atomic_inc(&css_stacks->count[i]);
> +                     break;
> +             }
> +             if (css_stacks->ips[i] == ip) {
> +                     atomic_inc(&css_stacks->count[i]);
> +                     break;
> +             }
>       }
> -     save_stack_trace(&trace);
> -     memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8);
> +     WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip);
>  }
>  EXPORT_SYMBOL(__save_css_stack);
>  
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index c3586e8e27ca..6c8989f1260d 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct 
> cgroup_subsys_state *css)
>  
>               css_stacks = css->stacks;
>               if (css_stacks) {
> -                     pr_err("css_relese: css_stacks offset %d\n",
> -                            atomic_read(&css_stacks->offset));
> -                     for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) {
> -                             if (css_stacks->stacks[i])
> -                                     pr_err("\t%pS\n",
> -                                            (void *)css_stacks->stacks[i]);
> +                     pr_err("css_relese: css_get/put ips\n");
> +                     for (i = 0; i < CSS_IPS_COUNT; i++) {
> +                             if (css_stacks->ips[i])
> +                                     pr_err("\t%d, [%lx] %pS\n",
> +                                             
> atomic_read(&css_stacks->count[i]),
> +                                             css_stacks->ips[i],
> +                                             (void *)css_stacks->ips[i]);
>                               else
> -                                     continue;
> +                                     break;
>                       }
> +                     BUG();
>               }
> -             BUG();
> +
>       }
>  
>  }
> 

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to