From: Zi Yan <[email protected]>

For THP soft-offline support, we first try to migrate a THP without
splitting. If the migration fails, we split the THP and migrate the
raw error page.

migrate_pages() does not split a THP if the migration reason is
MR_MEMORY_FAILURE.

Signed-off-by: Zi Yan <[email protected]>
---
 mm/memory-failure.c | 77 +++++++++++++++++++++++++++++++++++++----------------
 mm/migrate.c        | 16 +++++++++++
 2 files changed, 70 insertions(+), 23 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8a9ac6f9e1b0..c05107548d72 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1598,10 +1598,11 @@ static int soft_offline_huge_page(struct page *page, 
int flags)
        return ret;
 }
 
-static int __soft_offline_page(struct page *page, int flags)
+static int __soft_offline_page(struct page *page, int flags, int *split)
 {
        int ret;
-       unsigned long pfn = page_to_pfn(page);
+       struct page *hpage = compound_head(page);
+       unsigned long pfn = page_to_pfn(hpage);
 
        /*
         * Check PageHWPoison again inside page lock because PageHWPoison
@@ -1609,11 +1610,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
         * memory_failure() also double-checks PageHWPoison inside page lock,
         * so there's no race between soft_offline_page() and memory_failure().
         */
-       lock_page(page);
-       wait_on_page_writeback(page);
-       if (PageHWPoison(page)) {
-               unlock_page(page);
-               put_hwpoison_page(page);
+       lock_page(hpage);
+       wait_on_page_writeback(hpage);
+       if (PageHWPoison(hpage)) {
+               unlock_page(hpage);
+               put_hwpoison_page(hpage);
                pr_info("soft offline: %#lx page already poisoned\n", pfn);
                return -EBUSY;
        }
@@ -1621,14 +1622,14 @@ static int __soft_offline_page(struct page *page, int 
flags)
         * Try to invalidate first. This should work for
         * non dirty unmapped page cache pages.
         */
-       ret = invalidate_inode_page(page);
-       unlock_page(page);
+       ret = invalidate_inode_page(hpage);
+       unlock_page(hpage);
        /*
         * RED-PEN would be better to keep it isolated here, but we
         * would need to fix isolation locking first.
         */
        if (ret == 1) {
-               put_hwpoison_page(page);
+               put_hwpoison_page(hpage);
                pr_info("soft_offline: %#lx: invalidated\n", pfn);
                SetPageHWPoison(page);
                num_poisoned_pages_inc();
@@ -1640,15 +1641,15 @@ static int __soft_offline_page(struct page *page, int 
flags)
         * Try to migrate to a new page instead. migrate.c
         * handles a large number of cases for us.
         */
-       if (PageLRU(page))
-               ret = isolate_lru_page(page);
+       if (PageLRU(hpage))
+               ret = isolate_lru_page(hpage);
        else
-               ret = isolate_movable_page(page, ISOLATE_UNEVICTABLE);
+               ret = isolate_movable_page(hpage, ISOLATE_UNEVICTABLE);
        /*
         * Drop page reference which is came from get_any_page()
         * successful isolate_lru_page() already took another one.
         */
-       put_hwpoison_page(page);
+       put_hwpoison_page(hpage);
        if (!ret) {
                LIST_HEAD(pagelist);
                /*
@@ -1657,23 +1658,53 @@ static int __soft_offline_page(struct page *page, int 
flags)
                 * cannot have PAGE_MAPPING_MOVABLE.
                 */
                if (!__PageMovable(page))
-                       inc_node_page_state(page, NR_ISOLATED_ANON +
-                                               page_is_file_cache(page));
-               list_add(&page->lru, &pagelist);
+                       mod_node_page_state(page_pgdat(hpage), NR_ISOLATED_ANON 
+
+                                       page_is_file_cache(hpage), 
hpage_nr_pages(hpage));
+retry_subpage:
+               list_add(&hpage->lru, &pagelist);
                ret = migrate_pages(&pagelist, new_page, NULL, MPOL_MF_MOVE_ALL,
                                        MIGRATE_SYNC, MR_MEMORY_FAILURE);
                if (ret) {
-                       if (!list_empty(&pagelist))
-                               putback_movable_pages(&pagelist);
-
+                       if (!list_empty(&pagelist)) {
+                               if (!PageTransHuge(hpage))
+                                       putback_movable_pages(&pagelist);
+                               else {
+                                       lock_page(hpage);
+                                       if (split_huge_page_to_list(hpage, 
&pagelist)) {
+                                               unlock_page(hpage);
+                                               goto failed;
+                                       }
+                                       unlock_page(hpage);
+
+                                       if (split)
+                                               *split = 1;
+                                       /*
+                                        * Pull the raw error page out and put 
back other subpages.
+                                        * Then retry the raw error page.
+                                        */
+                                       list_del(&page->lru);
+                                       putback_movable_pages(&pagelist);
+                                       hpage = page;
+                                       goto retry_subpage;
+                               }
+                       }
+failed:
                        pr_info("soft offline: %#lx: migration failed %d, type 
%lx (%pGp)\n",
-                               pfn, ret, page->flags, &page->flags);
+                               pfn, ret, hpage->flags, &hpage->flags);
                        if (ret > 0)
                                ret = -EIO;
                }
+               /*
+                * Set PageHWPoison on the raw error page.
+                *
+                * If the page is a THP, PageHWPoison is set then cleared
+                * in its head page in migrate_pages(). So we need to set the 
raw error
+                * page here. Otherwise, setting PageHWPoison again is fine.
+                */
+               SetPageHWPoison(page);
        } else {
                pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx (%pGp)\n",
-                       pfn, ret, page_count(page), page->flags, &page->flags);
+                       pfn, ret, page_count(hpage), hpage->flags, 
&hpage->flags);
        }
        return ret;
 }
@@ -1704,7 +1735,7 @@ static int soft_offline_in_use_page(struct page *page, 
int flags, int *split)
        if (PageHuge(page))
                ret = soft_offline_huge_page(page, flags);
        else
-               ret = __soft_offline_page(page, flags);
+               ret = __soft_offline_page(page, flags, split);
 
        return ret;
 }
diff --git a/mm/migrate.c b/mm/migrate.c
index f7b69282d216..b44df9cf72fd 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1118,6 +1118,15 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
        }
 
        if (unlikely(PageTransHuge(page) && !PageTransHuge(newpage))) {
+               /*
+                * soft-offline wants to retry the raw error subpage, if the THP
+                * migration fails. So we do not split the THP here and exit 
directly.
+                */
+               if (reason == MR_MEMORY_FAILURE) {
+                       rc = -ENOMEM;
+                       goto put_new;
+               }
+
                lock_page(page);
                rc = split_huge_page(page);
                unlock_page(page);
@@ -1164,6 +1173,13 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
                         */
                        if (!test_set_page_hwpoison(page))
                                num_poisoned_pages_inc();
+
+                       /*
+                        * Clear PageHWPoison in the head page. The caller
+                        * is responsible for setting the raw error page.
+                        */
+                       if (PageTransHuge(page))
+                               ClearPageHWPoison(page);
                }
        } else {
                if (rc != -EAGAIN) {
-- 
2.13.2

Reply via email to