On 24.02.2021 13:48, Konstantin Khorenko wrote:
> Previously we invalidated all iterators up to the memcg root,
> but this is overkill: we can check if currently dying memcg is
> stored in iterator's 'last_visited' and invalidate only those
> unlucky iterators.
> 
> https://jira.sw.ru/browse/PSBM-123655
> 
> Signed-off-by: Konstantin Khorenko <[email protected]>
> ---
>  mm/memcontrol.c | 48 +++++++++++++++++++++++++++++++++++++++++-------
>  1 file changed, 41 insertions(+), 7 deletions(-)
> 
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index eec944f712b0..b6059f46049b 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -192,8 +192,8 @@ struct mem_cgroup_stat2_cpu {
>  
>  struct mem_cgroup_reclaim_iter {
>       /*
> -      * last scanned hierarchy member. Valid only if last_dead_count
> -      * matches memcg->dead_count of the hierarchy root group.
> +      * Last scanned hierarchy member.
> +      * If stored memcg is destroyed, the field is wiped.
>        */
>       struct mem_cgroup *last_visited;
>       unsigned long last_dead_count;
> @@ -1570,19 +1570,54 @@ static void mem_cgroup_iter_invalidate(struct 
> mem_cgroup *root,
>  {
>       struct mem_cgroup_reclaim_iter *iter;
>       struct mem_cgroup_per_zone *mz;
> +     struct mem_cgroup *pos;
>       int zone, node, i;
>  
> +     /*
> +      * When a group in the hierarchy below root is destroyed,
> +      * the hierarchy iterator can no longer be trusted iif
> +      * iter->last_visited contains the cgroup being destroyed.
> +      * Check if we get this unlikely case and invalidate the iterator
> +      * if so.
> +      *
> +      * Note, only "break-ed" iterators can store iter->last_visited
> +      * == dead_memcg because normally 'last_visited' is assigned
> +      * in mem_cgroup_iter_update() and 'new_position' is just after
> +      * css_tryget() there (ref inc-ed in __mem_cgroup_iter_next())
> +      * and thus cgroup is not offlined yet.
> +      *
> +      * mem_cgroup_iter_break() in its turn puts memcg's css but does
> +      * not wipe it from iter->last_visited.
> +      *
> +      * Q: Why dying memcg (dead_memcg) cannot get into
> +      *    iter->last_visited a bit later after we wipe it here?
> +      * A: Because up to the moment of the current function execution
> +      *    css_tryget() is guaranteed to fail on 'dead_memcg'.
> +      */
> +
> +     rcu_read_lock();

Looking this again, I think we do not need rcu_read_lock() here, so it does
not protect anything.

>       for_each_node(node) {
>               for (zone = 0; zone < MAX_NR_ZONES; zone++) {
>                       mz = mem_cgroup_zoneinfo(root, node, zone);
>  
>                       for (i = 0; i < ARRAY_SIZE(mz->reclaim_iter); i++) {
>                               iter = &mz->reclaim_iter[i];
> -                             rcu_assign_pointer(iter->last_visited, NULL);
> +
> +                             pos = rcu_access_pointer(iter->last_visited);
> +                             /*
> +                              * It's OK to race with mem_cgroup_iter_update()
> +                              * here because it cannot write new
> +                              * position == dead_memcg as
> +                              * css_tryget() for it should fail already.
> +                              */
> +                             if (pos == dead_memcg) {
> +                                     rcu_assign_pointer(iter->last_visited,
> +                                                        NULL);
>                               }
>                       }
>               }
>       }
> +     rcu_read_unlock();
>  
>       /*
>        * When a group in the hierarchy below root is destroyed, the
> @@ -1631,10 +1666,9 @@ static void mem_cgroup_iter_update(struct 
> mem_cgroup_reclaim_iter *iter,
>                                  int sequence)
>  {
>       /*
> -      * We store the sequence count from the time @last_visited was
> -      * loaded successfully instead of rereading it here so that we
> -      * don't lose destruction events in between.  We could have
> -      * raced with the destruction of @new_position after all.
> +      * The position saved in 'last_visited' is always valid.
> +      * If the stored corresponding cgroup is destroyed,
> +      * 'last_visited' is NULLed.
>        */
>       rcu_assign_pointer(iter->last_visited, new_position);
>       iter->last_dead_count = sequence;
> 

_______________________________________________
Devel mailing list
[email protected]
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to