Re: [PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap

2021-10-14 Thread Christoph Hellwig
On Fri, Sep 24, 2021 at 09:09:54PM +0800, Shiyang Ruan wrote:
> memory_failure_dev_pagemap code is a bit complex before introduce RMAP
> feature for fsdax.  So it is needed to factor some helper functions to
> simplify these code.
> 
> Signed-off-by: Shiyang Ruan 
> ---
>  mm/memory-failure.c | 140 
>  1 file changed, 76 insertions(+), 64 deletions(-)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 54879c339024..8ff9b52823c0 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1430,6 +1430,79 @@ static int try_to_split_thp_page(struct page *page, 
> const char *msg)
>   return 0;
>  }
>  
> +static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn,
> + struct address_space *mapping, pgoff_t index, int flags)
> +{
> + struct to_kill *tk;
> + unsigned long size = 0;
> +
> + list_for_each_entry(tk, to_kill, nd)
> + if (tk->size_shift)
> + size = max(size, 1UL << tk->size_shift);
> + if (size) {

Nit: an empty line here would be nice for readability.

> + if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> + /*
> +  * TODO: Handle HMM pages which may need coordination
> +  * with device-side memory.
> +  */
> + return -EBUSY;

We've got rid of the HMM terminology for device private memory, so
I'd reword this update the comment to follow that while you're at it.

Otherwise looks good:

Reviewed-by: Christoph Hellwig 



Re: [PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap

2021-10-14 Thread Darrick J. Wong
On Fri, Sep 24, 2021 at 09:09:54PM +0800, Shiyang Ruan wrote:
> memory_failure_dev_pagemap code is a bit complex before introduce RMAP
> feature for fsdax.  So it is needed to factor some helper functions to
> simplify these code.
> 
> Signed-off-by: Shiyang Ruan 

This looks like a reasonable hoist...
Reviewed-by: Darrick J. Wong 

--D

> ---
>  mm/memory-failure.c | 140 
>  1 file changed, 76 insertions(+), 64 deletions(-)
> 
> diff --git a/mm/memory-failure.c b/mm/memory-failure.c
> index 54879c339024..8ff9b52823c0 100644
> --- a/mm/memory-failure.c
> +++ b/mm/memory-failure.c
> @@ -1430,6 +1430,79 @@ static int try_to_split_thp_page(struct page *page, 
> const char *msg)
>   return 0;
>  }
>  
> +static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn,
> + struct address_space *mapping, pgoff_t index, int flags)
> +{
> + struct to_kill *tk;
> + unsigned long size = 0;
> +
> + list_for_each_entry(tk, to_kill, nd)
> + if (tk->size_shift)
> + size = max(size, 1UL << tk->size_shift);
> + if (size) {
> + /*
> +  * Unmap the largest mapping to avoid breaking up device-dax
> +  * mappings which are constant size. The actual size of the
> +  * mapping being torn down is communicated in siginfo, see
> +  * kill_proc()
> +  */
> + loff_t start = (index << PAGE_SHIFT) & ~(size - 1);
> +
> + unmap_mapping_range(mapping, start, size, 0);
> + }
> +
> + kill_procs(to_kill, flags & MF_MUST_KILL, false, pfn, flags);
> +}
> +
> +static int mf_generic_kill_procs(unsigned long long pfn, int flags,
> + struct dev_pagemap *pgmap)
> +{
> + struct page *page = pfn_to_page(pfn);
> + LIST_HEAD(to_kill);
> + dax_entry_t cookie;
> +
> + /*
> +  * Prevent the inode from being freed while we are interrogating
> +  * the address_space, typically this would be handled by
> +  * lock_page(), but dax pages do not use the page lock. This
> +  * also prevents changes to the mapping of this pfn until
> +  * poison signaling is complete.
> +  */
> + cookie = dax_lock_page(page);
> + if (!cookie)
> + return -EBUSY;
> +
> + if (hwpoison_filter(page))
> + return 0;
> +
> + if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> + /*
> +  * TODO: Handle HMM pages which may need coordination
> +  * with device-side memory.
> +  */
> + return -EBUSY;
> + }
> +
> + /*
> +  * Use this flag as an indication that the dax page has been
> +  * remapped UC to prevent speculative consumption of poison.
> +  */
> + SetPageHWPoison(page);
> +
> + /*
> +  * Unlike System-RAM there is no possibility to swap in a
> +  * different physical page at a given virtual address, so all
> +  * userspace consumption of ZONE_DEVICE memory necessitates
> +  * SIGBUS (i.e. MF_MUST_KILL)
> +  */
> + flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
> + collect_procs(page, &to_kill, true);
> +
> + unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
> + dax_unlock_page(page, cookie);
> + return 0;
> +}
> +
>  static int memory_failure_hugetlb(unsigned long pfn, int flags)
>  {
>   struct page *p = pfn_to_page(pfn);
> @@ -1519,12 +1592,8 @@ static int memory_failure_dev_pagemap(unsigned long 
> pfn, int flags,
>   struct dev_pagemap *pgmap)
>  {
>   struct page *page = pfn_to_page(pfn);
> - unsigned long size = 0;
> - struct to_kill *tk;
>   LIST_HEAD(tokill);
> - int rc = -EBUSY;
> - loff_t start;
> - dax_entry_t cookie;
> + int rc = -ENXIO;
>  
>   if (flags & MF_COUNT_INCREASED)
>   /*
> @@ -1533,67 +1602,10 @@ static int memory_failure_dev_pagemap(unsigned long 
> pfn, int flags,
>   put_page(page);
>  
>   /* device metadata space is not recoverable */
> - if (!pgmap_pfn_valid(pgmap, pfn)) {
> - rc = -ENXIO;
> - goto out;
> - }
> -
> - /*
> -  * Prevent the inode from being freed while we are interrogating
> -  * the address_space, typically this would be handled by
> -  * lock_page(), but dax pages do not use the page lock. This
> -  * also prevents changes to the mapping of this pfn until
> -  * poison signaling is complete.
> -  */
> - cookie = dax_lock_page(page);
> - if (!cookie)
> + if (!pgmap_pfn_valid(pgmap, pfn))
>   goto out;
>  
> - if (hwpoison_filter(page)) {
> - rc = 0;
> - goto unlock;
> - }
> -
> - if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
> - /*
> -  * TODO: Handle HMM pages which may need coordination
> -  * with device-side memory.
> -  */
> - goto unlock;

[PATCH v7 3/8] mm: factor helpers for memory_failure_dev_pagemap

2021-09-24 Thread Shiyang Ruan
memory_failure_dev_pagemap code is a bit complex before introduce RMAP
feature for fsdax.  So it is needed to factor some helper functions to
simplify these code.

Signed-off-by: Shiyang Ruan 
---
 mm/memory-failure.c | 140 
 1 file changed, 76 insertions(+), 64 deletions(-)

diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 54879c339024..8ff9b52823c0 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -1430,6 +1430,79 @@ static int try_to_split_thp_page(struct page *page, 
const char *msg)
return 0;
 }
 
+static void unmap_and_kill(struct list_head *to_kill, unsigned long pfn,
+   struct address_space *mapping, pgoff_t index, int flags)
+{
+   struct to_kill *tk;
+   unsigned long size = 0;
+
+   list_for_each_entry(tk, to_kill, nd)
+   if (tk->size_shift)
+   size = max(size, 1UL << tk->size_shift);
+   if (size) {
+   /*
+* Unmap the largest mapping to avoid breaking up device-dax
+* mappings which are constant size. The actual size of the
+* mapping being torn down is communicated in siginfo, see
+* kill_proc()
+*/
+   loff_t start = (index << PAGE_SHIFT) & ~(size - 1);
+
+   unmap_mapping_range(mapping, start, size, 0);
+   }
+
+   kill_procs(to_kill, flags & MF_MUST_KILL, false, pfn, flags);
+}
+
+static int mf_generic_kill_procs(unsigned long long pfn, int flags,
+   struct dev_pagemap *pgmap)
+{
+   struct page *page = pfn_to_page(pfn);
+   LIST_HEAD(to_kill);
+   dax_entry_t cookie;
+
+   /*
+* Prevent the inode from being freed while we are interrogating
+* the address_space, typically this would be handled by
+* lock_page(), but dax pages do not use the page lock. This
+* also prevents changes to the mapping of this pfn until
+* poison signaling is complete.
+*/
+   cookie = dax_lock_page(page);
+   if (!cookie)
+   return -EBUSY;
+
+   if (hwpoison_filter(page))
+   return 0;
+
+   if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+   /*
+* TODO: Handle HMM pages which may need coordination
+* with device-side memory.
+*/
+   return -EBUSY;
+   }
+
+   /*
+* Use this flag as an indication that the dax page has been
+* remapped UC to prevent speculative consumption of poison.
+*/
+   SetPageHWPoison(page);
+
+   /*
+* Unlike System-RAM there is no possibility to swap in a
+* different physical page at a given virtual address, so all
+* userspace consumption of ZONE_DEVICE memory necessitates
+* SIGBUS (i.e. MF_MUST_KILL)
+*/
+   flags |= MF_ACTION_REQUIRED | MF_MUST_KILL;
+   collect_procs(page, &to_kill, true);
+
+   unmap_and_kill(&to_kill, pfn, page->mapping, page->index, flags);
+   dax_unlock_page(page, cookie);
+   return 0;
+}
+
 static int memory_failure_hugetlb(unsigned long pfn, int flags)
 {
struct page *p = pfn_to_page(pfn);
@@ -1519,12 +1592,8 @@ static int memory_failure_dev_pagemap(unsigned long pfn, 
int flags,
struct dev_pagemap *pgmap)
 {
struct page *page = pfn_to_page(pfn);
-   unsigned long size = 0;
-   struct to_kill *tk;
LIST_HEAD(tokill);
-   int rc = -EBUSY;
-   loff_t start;
-   dax_entry_t cookie;
+   int rc = -ENXIO;
 
if (flags & MF_COUNT_INCREASED)
/*
@@ -1533,67 +1602,10 @@ static int memory_failure_dev_pagemap(unsigned long 
pfn, int flags,
put_page(page);
 
/* device metadata space is not recoverable */
-   if (!pgmap_pfn_valid(pgmap, pfn)) {
-   rc = -ENXIO;
-   goto out;
-   }
-
-   /*
-* Prevent the inode from being freed while we are interrogating
-* the address_space, typically this would be handled by
-* lock_page(), but dax pages do not use the page lock. This
-* also prevents changes to the mapping of this pfn until
-* poison signaling is complete.
-*/
-   cookie = dax_lock_page(page);
-   if (!cookie)
+   if (!pgmap_pfn_valid(pgmap, pfn))
goto out;
 
-   if (hwpoison_filter(page)) {
-   rc = 0;
-   goto unlock;
-   }
-
-   if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-   /*
-* TODO: Handle HMM pages which may need coordination
-* with device-side memory.
-*/
-   goto unlock;
-   }
-
-   /*
-* Use this flag as an indication that the dax page has been
-* remapped UC to prevent speculative consumption of poison.
-*/
-   SetPageHWPoison(page);
-
-   /*
-* U