A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.
This is undesirable because soft-offline (which is about corrected error)
is less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good reason
like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

Reported-by: Xishi Qiu <[email protected]>
Signed-off-by: Naoya Horiguchi <[email protected]>
---
 include/linux/page-flags.h |  5 +++++
 include/linux/swapops.h    | 10 ----------
 mm/memory-failure.c        | 26 +++++++++++++++++++++-----
 mm/migrate.c               |  2 +-
 mm/page_alloc.c            | 29 +++++++++++++++++++++++++++++
 5 files changed, 56 insertions(+), 16 deletions(-)

diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
index 901943e..74bee8c 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+       return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
index 9c0eb4d..fe8e08b 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
@@ -335,11 +335,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
        return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-       return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
        atomic_long_inc(&num_poisoned_pages);
@@ -362,11 +357,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
        return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-       return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
index c63d982..794687a 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include <linux/mm_inline.h>
 #include <linux/kfifo.h>
 #include <linux/ratelimit.h>
+#include <linux/page-isolation.h>
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1697,6 +1698,7 @@ static int __soft_offline_page(struct page *page, int 
flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
        int ret;
+       int mt;
        struct page *hpage = compound_head(page);
 
        if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1715,23 +1717,37 @@ static int soft_offline_in_use_page(struct page *page, 
int flags)
                put_hwpoison_page(hpage);
        }
 
+       /*
+        * Setting MIGRATE_ISOLATE here ensures that the page will be linked
+        * to free list immediately (not via pcplist) when released after
+        * successful page migration. Otherwise we can't guarantee that the
+        * page is really free after put_page() returns, so
+        * set_hwpoison_free_buddy_page() highly likely fails.
+        */
+       mt = get_pageblock_migratetype(page);
+       set_pageblock_migratetype(page, MIGRATE_ISOLATE);
        if (PageHuge(page))
                ret = soft_offline_huge_page(page, flags);
        else
                ret = __soft_offline_page(page, flags);
-
+       set_pageblock_migratetype(page, mt);
        return ret;
 }
 
-static void soft_offline_free_page(struct page *page)
+static int soft_offline_free_page(struct page *page)
 {
        int rc = 0;
        struct page *head = compound_head(page);
 
        if (PageHuge(head))
                rc = dissolve_free_huge_page(page);
-       if (!rc && !TestSetPageHWPoison(page))
-               num_poisoned_pages_inc();
+       if (!rc) {
+               if (set_hwpoison_free_buddy_page(page))
+                       num_poisoned_pages_inc();
+               else
+                       rc = -EBUSY;
+       }
+       return rc;
 }
 
 /**
@@ -1775,7 +1791,7 @@ int soft_offline_page(struct page *page, int flags)
        if (ret > 0)
                ret = soft_offline_in_use_page(page, flags);
        else if (ret == 0)
-               soft_offline_free_page(page);
+               ret = soft_offline_free_page(page);
 
        return ret;
 }
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/mm/migrate.c 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/migrate.c
index 3ae213b..e772323 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/mm/migrate.c
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/migrate.c
@@ -1199,7 +1199,7 @@ static ICE_noinline int unmap_and_move(new_page_t 
get_new_page,
                         * intentionally. Although it's rather weird,
                         * it's how HWPoison flag works at the moment.
                         */
-                       if (!test_set_page_hwpoison(page))
+                       if (set_hwpoison_free_buddy_page(page))
                                num_poisoned_pages_inc();
                }
        } else {
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/mm/page_alloc.c 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/page_alloc.c
index 607deff..3c76d40 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/mm/page_alloc.c
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/page_alloc.c
@@ -8027,3 +8027,32 @@ bool is_free_buddy_page(struct page *page)
 
        return order < MAX_ORDER;
 }
+
+#ifdef CONFIG_MEMORY_FAILURE
+/*
+ * Set PG_hwpoison flag if a given page is confirmed to be a free page
+ * within zone lock, which prevents the race against page allocation.
+ */
+bool set_hwpoison_free_buddy_page(struct page *page)
+{
+       struct zone *zone = page_zone(page);
+       unsigned long pfn = page_to_pfn(page);
+       unsigned long flags;
+       unsigned int order;
+       bool hwpoisoned = false;
+
+       spin_lock_irqsave(&zone->lock, flags);
+       for (order = 0; order < MAX_ORDER; order++) {
+               struct page *page_head = page - (pfn & ((1 << order) - 1));
+
+               if (PageBuddy(page_head) && page_order(page_head) >= order) {
+                       if (!TestSetPageHWPoison(page))
+                               hwpoisoned = true;
+                       break;
+               }
+       }
+       spin_unlock_irqrestore(&zone->lock, flags);
+
+       return hwpoisoned;
+}
+#endif
-- 
2.7.0

Reply via email to