Re: [PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap

2021-10-14 Thread Christoph Hellwig
On Fri, Sep 24, 2021 at 09:09:54PM +0800, Shiyang Ruan wrote:
> memory_failure_dev_pagemap code is a bit complex before introduce RMAP
> feature for fsdax.  So it is needed to factor some helper functions to
> simplify these code.
> 
> Signed-off-by: Shiyang Ruan 
> ---
>  mm/memory-failure.c | 140 
>  1 file changed, 76 insertions(+), 64 deletions(-)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 54879c339024..8ff9b52823c0 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1430,6 +1430,79 @@ static int try_to_split_thp_page(struct page *page, 
> const char *msg)
>   return 0;
>  }
>  
> +static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn,
> + struct address_space *mapping, pgoff_t index, int flags)
> +{
> + struct to_kill *tk;
> + unsigned long size = 0;
> +
> + list_for_each_entry(tk, to_kill, nd)
> + if (tk->size_shift)
> + size = max(size, 1UL << tk->size_shift);
> + if (size) {

Nit: an empty line here would be nice for readability.

> + if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> + /*
> +  * TODO: Handle HMM pages which may need coordination
> +  * with device-side memory.
> +  */
> + return -EBUSY;

We've got rid of the HMM terminology for device private memory, so
I'd reword this update the comment to follow that while you're at it.

Otherwise looks good:

Reviewed-by: Christoph Hellwig 



Re: [PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap

2021-10-14 Thread Darrick J. Wong
On Fri, Sep 24, 2021 at 09:09:54PM +0800, Shiyang Ruan wrote:
> memory_failure_dev_pagemap code is a bit complex before introduce RMAP
> feature for fsdax.  So it is needed to factor some helper functions to
> simplify these code.
> 
> Signed-off-by: Shiyang Ruan 

This looks like a reasonable hoist...
Reviewed-by: Darrick J. Wong 

--D

> ---
>  mm/memory-failure.c | 140 
>  1 file changed, 76 insertions(+), 64 deletions(-)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 54879c339024..8ff9b52823c0 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1430,6 +1430,79 @@ static int try_to_split_thp_page(struct page *page, 
> const char *msg)
>   return 0;
>  }
>  
> +static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn,
> + struct address_space *mapping, pgoff_t index, int flags)
> +{
> + struct to_kill *tk;
> + unsigned long size = 0;
> +
> + list_for_each_entry(tk, to_kill, nd)
> + if (tk->size_shift)
> + size = max(size, 1UL << tk->size_shift);
> + if (size) {
> + /*
> +  * Unmap the largest mapping to avoid breaking up device-dax
> +  * mappings which are constant size. The actual size of the
> +  * mapping being torn down is communicated in siginfo, see
> +  * kill_proc()
> +  */
> + loff_t start = (index << PAGE_SHIFT) & ~(size - 1);
> +
> + unmap_mapping_range(mapping, start, size, 0);
> + }
> +
> + kill_procs(to_kill, flags & MF_MUST_KILL, false, pfn, flags);
> +}
> +
> +static int mf_generic_kill_procs(unsigned long long pfn, int flags,
> + struct dev_pagemap *pgmap)
> +{
> + struct page *page = pfn_to_page(pfn);
> + LIST_HEAD(to_kill);
> + dax_entry_t cookie;
> +
> + /*
> +  * Prevent the inode from being freed while we are interrogating
> +  * the address_space, typically this would be handled by
> +  * lock_page(), but dax pages do not use the page lock. This
> +  * also prevents changes to the mapping of this pfn until
> +  * poison signaling is complete.
> +  */
> + cookie = dax_lock_page(page);
> + if (!cookie)
> + return -EBUSY;
> +
> + if (hwpoison_filter(page))
> + return 0;
> +
> + if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> + /*
> +  * TODO: Handle HMM pages which may need coordination
> +  * with device-side memory.
> +  */
> + return -EBUSY;
> + }
> +
> + /*
> +  * Use this flag as an indication that the dax page has been
> +  * remapped UC to prevent speculative consumption of poison.
> +  */
> + SetPageHWPoison(page);
> +
> + /*
> +  * Unlike System-RAM there is no possibility to swap in a
> +  * different physical page at a given virtual address, so all
> +  * userspace consumption of ZONE_DEVICE memory necessitates
> +  * SIGBUS (i.e. MF_MUST_KILL)
> +  */
> + flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
> + collect_procs(page, &to_kill, true);
> +
> + unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
> + dax_unlock_page(page, cookie);
> + return 0;
> +}
> +
>  static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  {
>   struct page *p = pfn_to_page(pfn);
> @@ -1519,12 +1592,8 @@ static int memory_failure_dev_pagemap(unsigned long 
> pfn, int flags,
>   struct dev_pagemap *pgmap)
>  {
>   struct page *page = pfn_to_page(pfn);
> - unsigned long size = 0;
> - struct to_kill *tk;
>   LIST_HEAD(tokill);
> - int rc = -EBUSY;
> - loff_t start;
> - dax_entry_t cookie;
> + int rc = -ENXIO;
>  
>   if (flags & MF_COUNT_INCREASED)
>   /*
> @@ -1533,67 +1602,10 @@ static int memory_failure_dev_pagemap(unsigned long 
> pfn, int flags,
>   put_page(page);
>  
>   /* device metadata space is not recoverable */
> - if (!pgmap_pfn_valid(pgmap, pfn)) {
> - rc = -ENXIO;
> - goto out;
> - }
> -
> - /*
> -  * Prevent the inode from being freed while we are interrogating
> -  * the address_space, typically this would be handled by
> -  * lock_page(), but dax pages do not use the page lock. This
> -  * also prevents changes to the mapping of this pfn until
> -  * poison signaling is complete.
> -  */
> - cookie = dax_lock_page(page);
> - if (!cookie)
> + if (!pgmap_pfn_valid(pgmap, pfn))
>   goto out;
>  
> - if (hwpoison_filter(page)) {
> - rc = 0;
> - goto unlock;
> - }
> -
> - if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> - /*
> -  * TODO: Handle HMM pages which may need coordination
> -  * with device-side memory.
> -  */
> - goto unlock;