[PATCH v2 2/2] mm: soft-offline: close the race against page allocation

2018-07-16 Thread Naoya Horiguchi
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.
This is undesirable because soft-offline (which is about corrected error)
is less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good reason
like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

Reported-by: Xishi Qiu 
Signed-off-by: Naoya Horiguchi 
---
changelog v1->v2:
- updated comment on set_hwpoison_free_buddy_page(),
- moved calling set_hwpoison_free_buddy_page() from mm/migrate.c to
  mm/memory-failure.c, which is necessary to check the return code of
  set_hwpoison_free_buddy_page().
---
 include/linux/page-flags.h |  5 +
 include/linux/swapops.h| 10 --
 mm/memory-failure.c| 35 +--
 mm/migrate.c   |  9 -
 mm/page_alloc.c| 30 ++
 5 files changed, 64 insertions(+), 25 deletions(-)

diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
index 901943e..74bee8c 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+   return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
index 9c0eb4d..fe8e08b 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
@@ -335,11 +335,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-   return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
atomic_long_inc(_poisoned_pages);
@@ -362,11 +357,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-   return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
index 9b77f85..936d0e7 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1609,8 +1610,10 @@ static int soft_offline_huge_page(struct page *page, int 
flags)
 */
ret = dissolve_free_huge_page(page);
if (!ret) {
-   if (!TestSetPageHWPoison(page))
+   if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
+   else
+   ret = -EBUSY;
}
}
return ret;
@@ -1688,6 +1691,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
pfn, ret, page->flags, >flags);
if (ret > 0)
ret = -EIO;
+   } else {
+   if (set_hwpoison_free_buddy_page(page))
+   num_poisoned_pages_inc();
+   else
+   ret = -EBUSY;
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx (%pGp)\n",
@@ -1699,6 +1707,7 @@ static int __soft_offline_page(struct page *page, int 
flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
int ret;
+   int mt;
struct page *hpage = compound_head(page);
 
if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1717,23 +1726,37 @@ static int soft_offline_in_use_page(struct page *page, 
int flags)
put_hwpoison_page(hpage);
}
 
+   /*
+* Setting MIGRATE_ISOLATE here ensures that the 

[PATCH v2 2/2] mm: soft-offline: close the race against page allocation

2018-07-16 Thread Naoya Horiguchi
A process can be killed with SIGBUS(BUS_MCEERR_AR) when it tries to
allocate a page that was just freed on the way of soft-offline.
This is undesirable because soft-offline (which is about corrected error)
is less aggressive than hard-offline (which is about uncorrected error),
and we can make soft-offline fail and keep using the page for good reason
like "system is busy."

Two main changes of this patch are:

- setting migrate type of the target page to MIGRATE_ISOLATE. As done
  in free_unref_page_commit(), this makes kernel bypass pcplist when
  freeing the page. So we can assume that the page is in freelist just
  after put_page() returns,

- setting PG_hwpoison on free page under zone->lock which protects
  freelists, so this allows us to avoid setting PG_hwpoison on a page
  that is decided to be allocated soon.

Reported-by: Xishi Qiu 
Signed-off-by: Naoya Horiguchi 
---
changelog v1->v2:
- updated comment on set_hwpoison_free_buddy_page(),
- moved calling set_hwpoison_free_buddy_page() from mm/migrate.c to
  mm/memory-failure.c, which is necessary to check the return code of
  set_hwpoison_free_buddy_page().
---
 include/linux/page-flags.h |  5 +
 include/linux/swapops.h| 10 --
 mm/memory-failure.c| 35 +--
 mm/migrate.c   |  9 -
 mm/page_alloc.c| 30 ++
 5 files changed, 64 insertions(+), 25 deletions(-)

diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
index 901943e..74bee8c 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/page-flags.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/page-flags.h
@@ -369,8 +369,13 @@ PAGEFLAG_FALSE(Uncached)
 PAGEFLAG(HWPoison, hwpoison, PF_ANY)
 TESTSCFLAG(HWPoison, hwpoison, PF_ANY)
 #define __PG_HWPOISON (1UL << PG_hwpoison)
+extern bool set_hwpoison_free_buddy_page(struct page *page);
 #else
 PAGEFLAG_FALSE(HWPoison)
+static inline bool set_hwpoison_free_buddy_page(struct page *page)
+{
+   return 0;
+}
 #define __PG_HWPOISON 0
 #endif
 
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
index 9c0eb4d..fe8e08b 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/include/linux/swapops.h
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/include/linux/swapops.h
@@ -335,11 +335,6 @@ static inline int is_hwpoison_entry(swp_entry_t entry)
return swp_type(entry) == SWP_HWPOISON;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-   return TestSetPageHWPoison(page);
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
atomic_long_inc(_poisoned_pages);
@@ -362,11 +357,6 @@ static inline int is_hwpoison_entry(swp_entry_t swp)
return 0;
 }
 
-static inline bool test_set_page_hwpoison(struct page *page)
-{
-   return false;
-}
-
 static inline void num_poisoned_pages_inc(void)
 {
 }
diff --git v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c 
v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
index 9b77f85..936d0e7 100644
--- v4.18-rc4-mmotm-2018-07-10-16-50/mm/memory-failure.c
+++ v4.18-rc4-mmotm-2018-07-10-16-50_patched/mm/memory-failure.c
@@ -57,6 +57,7 @@
 #include 
 #include 
 #include 
+#include 
 #include "internal.h"
 #include "ras/ras_event.h"
 
@@ -1609,8 +1610,10 @@ static int soft_offline_huge_page(struct page *page, int 
flags)
 */
ret = dissolve_free_huge_page(page);
if (!ret) {
-   if (!TestSetPageHWPoison(page))
+   if (set_hwpoison_free_buddy_page(page))
num_poisoned_pages_inc();
+   else
+   ret = -EBUSY;
}
}
return ret;
@@ -1688,6 +1691,11 @@ static int __soft_offline_page(struct page *page, int 
flags)
pfn, ret, page->flags, >flags);
if (ret > 0)
ret = -EIO;
+   } else {
+   if (set_hwpoison_free_buddy_page(page))
+   num_poisoned_pages_inc();
+   else
+   ret = -EBUSY;
}
} else {
pr_info("soft offline: %#lx: isolation failed: %d, page count 
%d, type %lx (%pGp)\n",
@@ -1699,6 +1707,7 @@ static int __soft_offline_page(struct page *page, int 
flags)
 static int soft_offline_in_use_page(struct page *page, int flags)
 {
int ret;
+   int mt;
struct page *hpage = compound_head(page);
 
if (!PageHuge(page) && PageTransHuge(hpage)) {
@@ -1717,23 +1726,37 @@ static int soft_offline_in_use_page(struct page *page, 
int flags)
put_hwpoison_page(hpage);
}
 
+   /*
+* Setting MIGRATE_ISOLATE here ensures that the