On 18.05.2020 18:14, Andrey Ryabinin wrote: > Current css_stacks debugging isn't very usefull, it records only ~128 stacks > which is not enough and doesn't allow us to identify the problem. > Instead of recording full stacks, record only instruction pointer > and the counter of how often we hit it. > > Example of output: > css_relese: css_get/put ips > 1, [ffffffffb9713c68] __ub_set_css+0x98/0xe0 > 851675, [ffffffffb985a208] get_mem_cgroup_from_mm+0x58/0xb0 > 776, [ffffffffb985a3e3] __memcg_kmem_get_cache+0x183/0x1a0 > 776, [ffffffffb985a3b8] __memcg_kmem_get_cache+0x158/0x1a0 > 70664, [ffffffffb985f678] __memcg_kmem_newpage_charge+0x148/0x1c0 > 166228, [ffffffffb985f15b] __mem_cgroup_try_charge+0x12b/0x190 > 776, [ffffffffb9859c00] memcg_kmem_cache_create_func+0x50/0x80 > 614007, [ffffffffb985f508] __memcg_kmem_put_cache+0x48/0x70 > 116613, [ffffffffb9713f48] __ub_get_css+0x68/0x190 > 116137, [ffffffffb9716648] ub_enough_memory+0xe8/0x110 > 352, [ffffffffb9714764] ub_total_pages+0x64/0xa0 > 51, [ffffffffb9715f88] bc_fill_sysinfo.part.0+0x58/0x80 > 51, [ffffffffb97161c8] bc_mem_notify+0x218/0x270 > 22, [ffffffffb9714538] ub_sync_memcg+0x58/0x80 > 1, [ffffffffb9859b80] mem_cgroup_force_empty_write+0x40/0x70 > 1, [ffffffffb985bfb4] mem_cgroup_css_offline+0x264/0x2a0 > 1, [ffffffffb97369e0] cgroup_offline_fn+0x170/0x1a0 > 1, [ffffffffb9713c78] __ub_set_css+0xa8/0xe0 > > https://jira.sw.ru/browse/PSBM-98148 > Signed-off-by: Andrey Ryabinin <[email protected]>
Reviewed-by: Kirill Tkhai <[email protected]> > --- > include/linux/cgroup.h | 22 ++++++++++++++-------- > kernel/cgroup.c | 37 ++++++++++++++++++------------------- > mm/memcontrol.c | 18 ++++++++++-------- > 3 files changed, 42 insertions(+), 35 deletions(-) > > diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h > index 1c1ee0c458e0..919cddd78168 100644 > --- a/include/linux/cgroup.h > +++ b/include/linux/cgroup.h > @@ -117,9 +117,11 @@ extern void cgroup_unload_subsys(struct cgroup_subsys > *ss); > > extern int proc_cgroup_show(struct seq_file *, void *); > > +#define CSS_IPS_COUNT 512 > +/* Instruction pointer and count */ > struct css_stacks { > - atomic_t offset; > - unsigned long stacks[(PAGE_SIZE*2)/8 - 1]; > + unsigned long ips[CSS_IPS_COUNT]; > + atomic_t count[CSS_IPS_COUNT]; > }; > > /* Per-subsystem/per-cgroup state maintained by the system. */ > @@ -154,7 +156,7 @@ enum { > extern struct static_key css_stacks_on; > void __save_css_stack(struct cgroup_subsys_state *css); > > -static inline void save_css_stack(struct cgroup_subsys_state *css) > +static __always_inline void save_css_stack(struct cgroup_subsys_state *css) > { > if (static_key_false(&css_stacks_on)) > __save_css_stack(css); > @@ -166,7 +168,7 @@ static inline void save_css_stack(struct > cgroup_subsys_state *css) > * - an existing ref-counted reference to the css > * - task->cgroups for a locked task > */ > -static inline void css_get(struct cgroup_subsys_state *css) > +static __always_inline void css_get(struct cgroup_subsys_state *css) > { > /* We don't need to reference count the root state */ > if (!(css->flags & CSS_ROOT)) { > @@ -182,12 +184,16 @@ static inline void css_get(struct cgroup_subsys_state > *css) > * the css has been destroyed. > */ > > -static inline bool css_tryget(struct cgroup_subsys_state *css) > +static __always_inline bool css_tryget(struct cgroup_subsys_state *css) > { > + bool ret; > + > if (css->flags & CSS_ROOT) > return true; > - save_css_stack(css); > - return percpu_ref_tryget_live(&css->refcnt); > + ret = percpu_ref_tryget_live(&css->refcnt); > + if (ret) > + save_css_stack(css); > + return ret; > } > > /* > @@ -195,7 +201,7 @@ static inline bool css_tryget(struct cgroup_subsys_state > *css) > * css_get() or css_tryget() > */ > > -static inline void css_put(struct cgroup_subsys_state *css) > +static __always_inline void css_put(struct cgroup_subsys_state *css) > { > if (!(css->flags & CSS_ROOT)) { > save_css_stack(css); > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index bf218e88ac9d..389128d37334 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -4150,7 +4150,7 @@ static void css_dput_fn(struct work_struct *work) > > css_stacks = css->stacks; > if (css_stacks) > - free_pages((unsigned long)css_stacks, 1); > + kfree(css_stacks); > > percpu_ref_exit(&css->refcnt); > cgroup_dput(css->cgroup); > @@ -4221,9 +4221,9 @@ static void init_cgroup_css(struct cgroup_subsys_state > *css, > { > css->cgroup = cgrp; > css->flags = 0; > - if (static_key_false(&css_stacks_on) && slab_is_available()) > - css->stacks = (struct css_stacks *) > - __get_free_pages(GFP_KERNEL|__GFP_NOFAIL|__GFP_ZERO, 1); > + if (static_key_false(&css_stacks_on) && slab_is_available() && > + ss == &mem_cgroup_subsys) > + css->stacks = kzalloc(sizeof(*css->stacks), GFP_KERNEL); > else > css->stacks = 0; > if (cgrp == dummytop) > @@ -4261,28 +4261,27 @@ static int online_css(struct cgroup_subsys *ss, > struct cgroup *cgrp) > > void __save_css_stack(struct cgroup_subsys_state *css) > { > - unsigned long entries[8]; > - unsigned int offset; > + int i; > struct css_stacks *css_stacks; > - struct stack_trace trace = { > - .nr_entries = 0, > - .entries = entries, > - .max_entries = 8, > - .skip = 0 > - }; > + unsigned long ip = _RET_IP_; > > css_stacks = css->stacks; > if (!css_stacks) > return; > > - memset(entries, 0, sizeof(entries)); > - offset = atomic_add_return(8*8, &css_stacks->offset) % (PAGE_SIZE*2); > - if (offset == 0) { > - offset += 8; > - trace.max_entries = 7; > + for (i = 0; i < CSS_IPS_COUNT; i++) { I hope this won't too slow. > + if (css_stacks->ips[i] == 0) { > + if (cmpxchg(&css_stacks->ips[i], 0, ip) != 0) > + continue; > + atomic_inc(&css_stacks->count[i]); > + break; > + } > + if (css_stacks->ips[i] == ip) { > + atomic_inc(&css_stacks->count[i]); > + break; > + } > } > - save_stack_trace(&trace); > - memcpy(((char*)css_stacks)+offset, entries, trace.max_entries*8); > + WARN(i == CSS_IPS_COUNT, "css_ips overflow %p %pS\n", css, (void *)ip); > } > EXPORT_SYMBOL(__save_css_stack); > > diff --git a/mm/memcontrol.c b/mm/memcontrol.c > index c3586e8e27ca..6c8989f1260d 100644 > --- a/mm/memcontrol.c > +++ b/mm/memcontrol.c > @@ -3002,17 +3002,19 @@ void memcg_css_release_check_kmem(struct > cgroup_subsys_state *css) > > css_stacks = css->stacks; > if (css_stacks) { > - pr_err("css_relese: css_stacks offset %d\n", > - atomic_read(&css_stacks->offset)); > - for (i = 0; i < PAGE_SIZE*2/8 - 1; i++) { > - if (css_stacks->stacks[i]) > - pr_err("\t%pS\n", > - (void *)css_stacks->stacks[i]); > + pr_err("css_relese: css_get/put ips\n"); > + for (i = 0; i < CSS_IPS_COUNT; i++) { > + if (css_stacks->ips[i]) > + pr_err("\t%d, [%lx] %pS\n", > + > atomic_read(&css_stacks->count[i]), > + css_stacks->ips[i], > + (void *)css_stacks->ips[i]); > else > - continue; > + break; > } > + BUG(); > } > - BUG(); > + > } > > } > _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
