On 2013/1/26 13:02, Naoya Horiguchi wrote:

> Currently soft_offline_page() is hard to maintain because it has many
> return points and goto statements. All of this mess come from get_any_page().
> This function should only get page refcount as the name implies, but it does
> some page isolating actions like SetPageHWPoison() and dequeuing hugepage.
> This patch corrects it and introduces some internal subroutines to make
> soft offlining code more readable and maintainable.
> 
> ChangeLog v2:
>   - receive returned value from __soft_offline_page and soft_offline_huge_page
>   - place __soft_offline_page after soft_offline_page to reduce the diff
>   - rebased onto mmotm-2013-01-23-17-04
>   - add comment on double checks of PageHWpoison
> 
> Signed-off-by: Naoya Horiguchi <[email protected]>
> ---
>  mm/memory-failure.c | 154 
> ++++++++++++++++++++++++++++------------------------
>  1 file changed, 83 insertions(+), 71 deletions(-)
> 
> diff --git mmotm-2013-01-23-17-04.orig/mm/memory-failure.c 
> mmotm-2013-01-23-17-04/mm/memory-failure.c
> index c95e19a..302625b 100644
> --- mmotm-2013-01-23-17-04.orig/mm/memory-failure.c
> +++ mmotm-2013-01-23-17-04/mm/memory-failure.c
> @@ -1368,7 +1368,7 @@ static struct page *new_page(struct page *p, unsigned 
> long private, int **x)
>   * that is not free, and 1 for any other page type.
>   * For 1 the page is returned with increased page count, otherwise not.
>   */
> -static int get_any_page(struct page *p, unsigned long pfn, int flags)
> +static int __get_any_page(struct page *p, unsigned long pfn, int flags)
>  {
>       int ret;
>  
> @@ -1393,11 +1393,9 @@ static int get_any_page(struct page *p, unsigned long 
> pfn, int flags)
>       if (!get_page_unless_zero(compound_head(p))) {
>               if (PageHuge(p)) {
>                       pr_info("%s: %#lx free huge page\n", __func__, pfn);
> -                     ret = dequeue_hwpoisoned_huge_page(compound_head(p));
> +                     ret = 0;
>               } else if (is_free_buddy_page(p)) {
>                       pr_info("%s: %#lx free buddy page\n", __func__, pfn);
> -                     /* Set hwpoison bit while page is still isolated */
> -                     SetPageHWPoison(p);
>                       ret = 0;
>               } else {
>                       pr_info("%s: %#lx: unknown zero refcount page type 
> %lx\n",
> @@ -1413,42 +1411,62 @@ static int get_any_page(struct page *p, unsigned long 
> pfn, int flags)
>       return ret;
>  }
>  
> +static int get_any_page(struct page *page, unsigned long pfn, int flags)
> +{
> +     int ret = __get_any_page(page, pfn, flags);
> +
> +     if (ret == 1 && !PageHuge(page) && !PageLRU(page)) {
> +             /*
> +              * Try to free it.
> +              */
> +             put_page(page);
> +             shake_page(page, 1);
> +
> +             /*
> +              * Did it turn free?
> +              */
> +             ret = __get_any_page(page, pfn, 0);
> +             if (!PageLRU(page)) {
> +                     pr_info("soft_offline: %#lx: unknown non LRU page type 
> %lx\n",
> +                             pfn, page->flags);
> +                     return -EIO;
> +             }
> +     }
> +     return ret;
> +}
> +
>  static int soft_offline_huge_page(struct page *page, int flags)
>  {
>       int ret;
>       unsigned long pfn = page_to_pfn(page);
>       struct page *hpage = compound_head(page);
>  
> +     /*
> +      * This double-check of PageHWPoison is to avoid the race with
> +      * memory_failure(). See also comment in __soft_offline_page().
> +      */
> +     lock_page(hpage);
>       if (PageHWPoison(hpage)) {
> +             unlock_page(hpage);
> +             put_page(hpage);
>               pr_info("soft offline: %#lx hugepage already poisoned\n", pfn);
> -             ret = -EBUSY;
> -             goto out;
> +             return -EBUSY;
>       }
> -
> -     ret = get_any_page(page, pfn, flags);
> -     if (ret < 0)
> -             goto out;
> -     if (ret == 0)
> -             goto done;
> +     unlock_page(hpage);
>  
>       /* Keep page count to indicate a given hugepage is isolated. */
>       ret = migrate_huge_page(hpage, new_page, MPOL_MF_MOVE_ALL, false,
>                               MIGRATE_SYNC);
>       put_page(hpage);
> -     if (ret) {
> +     if (ret)
>               pr_info("soft offline: %#lx: migration failed %d, type %lx\n",
>                       pfn, ret, page->flags);
> -             goto out;
> -     }
> -done:
>       /* keep elevated page count for bad page */
> -     atomic_long_add(1 << compound_trans_order(hpage), &num_poisoned_pages);
> -     set_page_hwpoison_huge_page(hpage);
> -     dequeue_hwpoisoned_huge_page(hpage);

Hi Naoya,

Does num_poisoned_pages be added when soft_offline_huge_page? I mean the in-use 
huge pages.

Thanks,
Xishi Qiu

> -out:
>       return ret;
>  }
>  
> +static int __soft_offline_page(struct page *page, int flags);
> +
>  /**
>   * soft_offline_page - Soft offline a page.
>   * @page: page to offline
> @@ -1477,62 +1495,60 @@ int soft_offline_page(struct page *page, int flags)
>       unsigned long pfn = page_to_pfn(page);
>       struct page *hpage = compound_trans_head(page);
>  
> -     if (PageHuge(page)) {
> -             ret = soft_offline_huge_page(page, flags);
> -             goto out;
> +     if (PageHWPoison(page)) {
> +             pr_info("soft offline: %#lx page already poisoned\n", pfn);
> +             return -EBUSY;
>       }
> -     if (PageTransHuge(hpage)) {
> +     if (!PageHuge(page) && PageTransHuge(hpage)) {
>               if (PageAnon(hpage) && unlikely(split_huge_page(hpage))) {
>                       pr_info("soft offline: %#lx: failed to split THP\n",
>                               pfn);
> -                     ret = -EBUSY;
> -                     goto out;
> +                     return -EBUSY;
>               }
>       }
>  
> -     if (PageHWPoison(page)) {
> -             pr_info("soft offline: %#lx page already poisoned\n", pfn);
> -             ret = -EBUSY;
> -             goto out;
> -     }
> -
>       ret = get_any_page(page, pfn, flags);
>       if (ret < 0)
> -             goto out;
> -     if (ret == 0)
> -             goto done;
> -
> -     /*
> -      * Page cache page we can handle?
> -      */
> -     if (!PageLRU(page)) {
> -             /*
> -              * Try to free it.
> -              */
> -             put_page(page);
> -             shake_page(page, 1);
> -
> -             /*
> -              * Did it turn free?
> -              */
> -             ret = get_any_page(page, pfn, 0);
> -             if (ret < 0)
> -                     goto out;
> -             if (ret == 0)
> -                     goto done;
> -     }
> -     if (!PageLRU(page)) {
> -             pr_info("soft_offline: %#lx: unknown non LRU page type %lx\n",
> -                     pfn, page->flags);
> -             ret = -EIO;
> -             goto out;
> +             return ret;
> +     if (ret) { /* for in-use pages */
> +             if (PageHuge(page))
> +                     ret = soft_offline_huge_page(page, flags);
> +             else
> +                     ret = __soft_offline_page(page, flags);
> +     } else { /* for free pages */
> +             if (PageHuge(page)) {
> +                     set_page_hwpoison_huge_page(hpage);
> +                     dequeue_hwpoisoned_huge_page(hpage);
> +                     atomic_long_add(1 << compound_trans_order(hpage),
> +                                     &num_poisoned_pages);
> +             } else {
> +                     SetPageHWPoison(page);
> +                     atomic_long_inc(&num_poisoned_pages);
> +             }
>       }
> +     /* keep elevated page count for bad page */
> +     return ret;
> +}
> +
> +static int __soft_offline_page(struct page *page, int flags)
> +{
> +     int ret;
> +     unsigned long pfn = page_to_pfn(page);
>  
>       /*
> -      * Synchronized using the page lock with memory_failure()
> +      * Check PageHWPoison again inside page lock because PageHWPoison
> +      * is set by memory_failure() outside page lock. Note that
> +      * memory_failure() also double-checks PageHWPoison inside page lock,
> +      * so there's no race between soft_offline_page() and memory_failure().
>        */
>       lock_page(page);
>       wait_on_page_writeback(page);
> +     if (PageHWPoison(page)) {
> +             unlock_page(page);
> +             put_page(page);
> +             pr_info("soft offline: %#lx page already poisoned\n", pfn);
> +             return -EBUSY;
> +     }
>       /*
>        * Try to invalidate first. This should work for
>        * non dirty unmapped page cache pages.
> @@ -1545,9 +1561,10 @@ int soft_offline_page(struct page *page, int flags)
>        */
>       if (ret == 1) {
>               put_page(page);
> -             ret = 0;
>               pr_info("soft_offline: %#lx: invalidated\n", pfn);
> -             goto done;
> +             SetPageHWPoison(page);
> +             atomic_long_inc(&num_poisoned_pages);
> +             return 0;
>       }
>  
>       /*
> @@ -1575,18 +1592,13 @@ int soft_offline_page(struct page *page, int flags)
>                               pfn, ret, page->flags);
>                       if (ret > 0)
>                               ret = -EIO;
> +             } else {
> +                     SetPageHWPoison(page);
> +                     atomic_long_inc(&num_poisoned_pages);
>               }
>       } else {
>               pr_info("soft offline: %#lx: isolation failed: %d, page count 
> %d, type %lx\n",
>                       pfn, ret, page_count(page), page->flags);
>       }
> -     if (ret)
> -             goto out;
> -
> -done:
> -     /* keep elevated page count for bad page */
> -     atomic_long_inc(&num_poisoned_pages);
> -     SetPageHWPoison(page);
> -out:
>       return ret;
>  }



--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to