This patch reorder the isolation steps during munlock, move the lru lock
to guard each pages, unfold __munlock_isolate_lru_page func, to do the
preparation for lru lock change.

__split_huge_page_refcount doesn't exist, but we still have to guard
PageMlocked and PageLRU in __split_huge_page_tail, that is the reason
ClearPageLRU action is moved after lru locking.

[l...@intel.com: found a sleeping function bug ... at mm/rmap.c]
Signed-off-by: Alex Shi <alex....@linux.alibaba.com>
Cc: Kirill A. Shutemov <kir...@shutemov.name>
Cc: Andrew Morton <a...@linux-foundation.org>
Cc: Johannes Weiner <han...@cmpxchg.org>
Cc: Matthew Wilcox <wi...@infradead.org>
Cc: Hugh Dickins <hu...@google.com>
Cc: linux...@kvack.org
Cc: linux-kernel@vger.kernel.org
---
 mm/mlock.c | 93 ++++++++++++++++++++++++++++++++++----------------------------
 1 file changed, 51 insertions(+), 42 deletions(-)

diff --git a/mm/mlock.c b/mm/mlock.c
index 228ba5a8e0a5..7098be122966 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -103,25 +103,6 @@ void mlock_vma_page(struct page *page)
 }
 
 /*
- * Isolate a page from LRU with optional get_page() pin.
- * Assumes lru_lock already held and page already pinned.
- */
-static bool __munlock_isolate_lru_page(struct page *page, bool getpage)
-{
-       if (TestClearPageLRU(page)) {
-               struct lruvec *lruvec;
-
-               lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
-               if (getpage)
-                       get_page(page);
-               del_page_from_lru_list(page, lruvec, page_lru(page));
-               return true;
-       }
-
-       return false;
-}
-
-/*
  * Finish munlock after successful page isolation
  *
  * Page must be locked. This is a wrapper for try_to_munlock()
@@ -181,6 +162,7 @@ static void __munlock_isolation_failed(struct page *page)
 unsigned int munlock_vma_page(struct page *page)
 {
        int nr_pages;
+       bool clearlru = false;
        pg_data_t *pgdat = page_pgdat(page);
 
        /* For try_to_munlock() and to serialize with page migration */
@@ -189,32 +171,42 @@ unsigned int munlock_vma_page(struct page *page)
        VM_BUG_ON_PAGE(PageTail(page), page);
 
        /*
-        * Serialize with any parallel __split_huge_page_refcount() which
+        * Serialize with any parallel __split_huge_page_tail() which
         * might otherwise copy PageMlocked to part of the tail pages before
         * we clear it in the head page. It also stabilizes hpage_nr_pages().
         */
+       get_page(page);
        spin_lock_irq(&pgdat->lru_lock);
+       clearlru = TestClearPageLRU(page);
 
        if (!TestClearPageMlocked(page)) {
-               /* Potentially, PTE-mapped THP: do not skip the rest PTEs */
-               nr_pages = 1;
-               goto unlock_out;
+               if (clearlru)
+                       SetPageLRU(page);
+               /*
+                * Potentially, PTE-mapped THP: do not skip the rest PTEs
+                * Reuse lock as memory barrier for release_pages racing.
+                */
+               spin_unlock_irq(&pgdat->lru_lock);
+               put_page(page);
+               return 0;
        }
 
        nr_pages = hpage_nr_pages(page);
        __mod_zone_page_state(page_zone(page), NR_MLOCK, -nr_pages);
 
-       if (__munlock_isolate_lru_page(page, true)) {
+       if (clearlru) {
+               struct lruvec *lruvec;
+
+               lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+               del_page_from_lru_list(page, lruvec, page_lru(page));
                spin_unlock_irq(&pgdat->lru_lock);
                __munlock_isolated_page(page);
-               goto out;
+       } else {
+               spin_unlock_irq(&pgdat->lru_lock);
+               put_page(page);
+               __munlock_isolation_failed(page);
        }
-       __munlock_isolation_failed(page);
-
-unlock_out:
-       spin_unlock_irq(&pgdat->lru_lock);
 
-out:
        return nr_pages - 1;
 }
 
@@ -297,34 +289,51 @@ static void __munlock_pagevec(struct pagevec *pvec, 
struct zone *zone)
        pagevec_init(&pvec_putback);
 
        /* Phase 1: page isolation */
-       spin_lock_irq(&zone->zone_pgdat->lru_lock);
        for (i = 0; i < nr; i++) {
                struct page *page = pvec->pages[i];
+               struct lruvec *lruvec;
+               bool clearlru;
 
-               if (TestClearPageMlocked(page)) {
-                       /*
-                        * We already have pin from follow_page_mask()
-                        * so we can spare the get_page() here.
-                        */
-                       if (__munlock_isolate_lru_page(page, false))
-                               continue;
-                       else
-                               __munlock_isolation_failed(page);
-               } else {
+               clearlru = TestClearPageLRU(page);
+               spin_lock_irq(&zone->zone_pgdat->lru_lock);
+
+               if (!TestClearPageMlocked(page)) {
                        delta_munlocked++;
+                       if (clearlru)
+                               SetPageLRU(page);
+                       goto putback;
+               }
+
+               if (!clearlru) {
+                       __munlock_isolation_failed(page);
+                       goto putback;
                }
 
                /*
+                * Isolate this page.
+                * We already have pin from follow_page_mask()
+                * so we can spare the get_page() here.
+                */
+               lruvec = mem_cgroup_page_lruvec(page, page_pgdat(page));
+               del_page_from_lru_list(page, lruvec, page_lru(page));
+               spin_unlock_irq(&zone->zone_pgdat->lru_lock);
+               continue;
+
+               /*
                 * We won't be munlocking this page in the next phase
                 * but we still need to release the follow_page_mask()
                 * pin. We cannot do it under lru_lock however. If it's
                 * the last pin, __page_cache_release() would deadlock.
                 */
+putback:
+               spin_unlock_irq(&zone->zone_pgdat->lru_lock);
                pagevec_add(&pvec_putback, pvec->pages[i]);
                pvec->pages[i] = NULL;
        }
+       /* tempary disable irq, will remove later */
+       local_irq_disable();
        __mod_zone_page_state(zone, NR_MLOCK, delta_munlocked);
-       spin_unlock_irq(&zone->zone_pgdat->lru_lock);
+       local_irq_enable();
 
        /* Now we can release pins of pages that we are not munlocking */
        pagevec_release(&pvec_putback);
-- 
1.8.3.1

Reply via email to