On Mon, 24 Aug 2020, Alex Shi wrote:

> From: Alexander Duyck <alexander.h.du...@linux.intel.com>
> 
> Use this new function to replace repeated same code, no func change.
> 
> When testing for relock we can avoid the need for RCU locking if we simply
> compare the page pgdat and memcg pointers versus those that the lruvec is
> holding. By doing this we can avoid the extra pointer walks and accesses of
> the memory cgroup.
> 
> In addition we can avoid the checks entirely if lruvec is currently NULL.
> 
> Signed-off-by: Alexander Duyck <alexander.h.du...@linux.intel.com>
> Signed-off-by: Alex Shi <alex....@linux.alibaba.com>

Again, I'll wait to see __munlock_pagevec() fixed before Acking this
one, but that's the only issue.  And I've suggested that you use
lruvec_holds_page_lru_lock() in mm/vmscan.c move_pages_to_lru(),
to replace the uglier and less efficient VM_BUG_ON_PAGE there.

Oh, there is one other issue: 0day robot did report (2020-06-19)
that sparse doesn't understand relock_page_lruvec*(): I've never
got around to working out how to write what it needs, conditional
__release plus __acquire in some form, I imagine. I've never got
into sparse annotations before, I'll give it a try, but if anyone
beats me that will be welcome: and there are higher priorities -
I do not think you should wait for the sparse warning to be fixed
before reposting.

> Cc: Johannes Weiner <han...@cmpxchg.org>
> Cc: Andrew Morton <a...@linux-foundation.org>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Cc: Andrey Ryabinin <aryabi...@virtuozzo.com>
> Cc: Matthew Wilcox <wi...@infradead.org>
> Cc: Mel Gorman <mgor...@techsingularity.net>
> Cc: Konstantin Khlebnikov <khlebni...@yandex-team.ru>
> Cc: Hugh Dickins <hu...@google.com>
> Cc: Tejun Heo <t...@kernel.org>
> Cc: linux-kernel@vger.kernel.org
> Cc: cgro...@vger.kernel.org
> Cc: linux...@kvack.org
> ---
>  include/linux/memcontrol.h | 52 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  mm/mlock.c                 |  9 +-------
>  mm/swap.c                  | 33 +++++++----------------------
>  mm/vmscan.c                |  8 +------
>  4 files changed, 61 insertions(+), 41 deletions(-)
> 
> diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
> index 7b170e9028b5..ee6ef2d8ad52 100644
> --- a/include/linux/memcontrol.h
> +++ b/include/linux/memcontrol.h
> @@ -488,6 +488,22 @@ static inline struct lruvec *mem_cgroup_lruvec(struct 
> mem_cgroup *memcg,
>  
>  struct lruvec *mem_cgroup_page_lruvec(struct page *, struct pglist_data *);
>  
> +static inline bool lruvec_holds_page_lru_lock(struct page *page,
> +                                           struct lruvec *lruvec)
> +{
> +     pg_data_t *pgdat = page_pgdat(page);
> +     const struct mem_cgroup *memcg;
> +     struct mem_cgroup_per_node *mz;
> +
> +     if (mem_cgroup_disabled())
> +             return lruvec == &pgdat->__lruvec;
> +
> +     mz = container_of(lruvec, struct mem_cgroup_per_node, lruvec);
> +     memcg = page->mem_cgroup ? : root_mem_cgroup;
> +
> +     return lruvec->pgdat == pgdat && mz->memcg == memcg;
> +}
> +
>  struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
>  
>  struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
> @@ -1023,6 +1039,14 @@ static inline struct lruvec 
> *mem_cgroup_page_lruvec(struct page *page,
>       return &pgdat->__lruvec;
>  }
>  
> +static inline bool lruvec_holds_page_lru_lock(struct page *page,
> +                                           struct lruvec *lruvec)
> +{
> +             pg_data_t *pgdat = page_pgdat(page);
> +
> +             return lruvec == &pgdat->__lruvec;
> +}
> +
>  static inline struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
>  {
>       return NULL;
> @@ -1469,6 +1493,34 @@ static inline void 
> unlock_page_lruvec_irqrestore(struct lruvec *lruvec,
>       spin_unlock_irqrestore(&lruvec->lru_lock, flags);
>  }
>  
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irq(struct page *page,
> +             struct lruvec *locked_lruvec)
> +{
> +     if (locked_lruvec) {
> +             if (lruvec_holds_page_lru_lock(page, locked_lruvec))
> +                     return locked_lruvec;
> +
> +             unlock_page_lruvec_irq(locked_lruvec);
> +     }
> +
> +     return lock_page_lruvec_irq(page);
> +}
> +
> +/* Don't lock again iff page's lruvec locked */
> +static inline struct lruvec *relock_page_lruvec_irqsave(struct page *page,
> +             struct lruvec *locked_lruvec, unsigned long *flags)
> +{
> +     if (locked_lruvec) {
> +             if (lruvec_holds_page_lru_lock(page, locked_lruvec))
> +                     return locked_lruvec;
> +
> +             unlock_page_lruvec_irqrestore(locked_lruvec, *flags);
> +     }
> +
> +     return lock_page_lruvec_irqsave(page, flags);
> +}
> +
>  #ifdef CONFIG_CGROUP_WRITEBACK
>  
>  struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb);
> diff --git a/mm/mlock.c b/mm/mlock.c
> index 177d2588e863..0448409184e3 100644
> --- a/mm/mlock.c
> +++ b/mm/mlock.c
> @@ -302,17 +302,10 @@ static void __munlock_pagevec(struct pagevec *pvec, 
> struct zone *zone)
>       /* Phase 1: page isolation */
>       for (i = 0; i < nr; i++) {
>               struct page *page = pvec->pages[i];
> -             struct lruvec *new_lruvec;
>  
>               /* block memcg change in mem_cgroup_move_account */
>               lock_page_memcg(page);
> -             new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> -             if (new_lruvec != lruvec) {
> -                     if (lruvec)
> -                             unlock_page_lruvec_irq(lruvec);
> -                     lruvec = lock_page_lruvec_irq(page);
> -             }
> -
> +             lruvec = relock_page_lruvec_irq(page, lruvec);
>               if (TestClearPageMlocked(page)) {
>                       /*
>                        * We already have pin from follow_page_mask()
> diff --git a/mm/swap.c b/mm/swap.c
> index b67959b701c0..2ac78e8fab71 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -209,19 +209,12 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
>  
>       for (i = 0; i < pagevec_count(pvec); i++) {
>               struct page *page = pvec->pages[i];
> -             struct lruvec *new_lruvec;
>  
>               /* block memcg migration during page moving between lru */
>               if (!TestClearPageLRU(page))
>                       continue;
>  
> -             new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> -             if (lruvec != new_lruvec) {
> -                     if (lruvec)
> -                             unlock_page_lruvec_irqrestore(lruvec, flags);
> -                     lruvec = lock_page_lruvec_irqsave(page, &flags);
> -             }
> -
> +             lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
>               (*move_fn)(page, lruvec);
>  
>               SetPageLRU(page);
> @@ -865,17 +858,12 @@ void release_pages(struct page **pages, int nr)
>               }
>  
>               if (PageLRU(page)) {
> -                     struct lruvec *new_lruvec;
> -
> -                     new_lruvec = mem_cgroup_page_lruvec(page,
> -                                                     page_pgdat(page));
> -                     if (new_lruvec != lruvec) {
> -                             if (lruvec)
> -                                     unlock_page_lruvec_irqrestore(lruvec,
> -                                                                     flags);
> +                     struct lruvec *prev_lruvec = lruvec;
> +
> +                     lruvec = relock_page_lruvec_irqsave(page, lruvec,
> +                                                                     &flags);
> +                     if (prev_lruvec != lruvec)
>                               lock_batch = 0;
> -                             lruvec = lock_page_lruvec_irqsave(page, &flags);
> -                     }
>  
>                       VM_BUG_ON_PAGE(!PageLRU(page), page);
>                       __ClearPageLRU(page);
> @@ -982,15 +970,8 @@ void __pagevec_lru_add(struct pagevec *pvec)
>  
>       for (i = 0; i < pagevec_count(pvec); i++) {
>               struct page *page = pvec->pages[i];
> -             struct lruvec *new_lruvec;
> -
> -             new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> -             if (lruvec != new_lruvec) {
> -                     if (lruvec)
> -                             unlock_page_lruvec_irqrestore(lruvec, flags);
> -                     lruvec = lock_page_lruvec_irqsave(page, &flags);
> -             }
>  
> +             lruvec = relock_page_lruvec_irqsave(page, lruvec, &flags);
>               __pagevec_lru_add_fn(page, lruvec);
>       }
>       if (lruvec)
> diff --git a/mm/vmscan.c b/mm/vmscan.c
> index 789444ae4c88..2c94790d4cb1 100644
> --- a/mm/vmscan.c
> +++ b/mm/vmscan.c
> @@ -4280,15 +4280,9 @@ void check_move_unevictable_pages(struct pagevec *pvec)
>  
>       for (i = 0; i < pvec->nr; i++) {
>               struct page *page = pvec->pages[i];
> -             struct lruvec *new_lruvec;
>  
>               pgscanned++;
> -             new_lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
> -             if (lruvec != new_lruvec) {
> -                     if (lruvec)
> -                             unlock_page_lruvec_irq(lruvec);
> -                     lruvec = lock_page_lruvec_irq(page);
> -             }
> +             lruvec = relock_page_lruvec_irq(page, lruvec);
>  
>               if (!PageLRU(page) || !PageUnevictable(page))
>                       continue;
> -- 
> 1.8.3.1

Reply via email to