Re: [PATCH] mm: fix missing wake-up event for FSDAX pages

2022-07-04 Thread Muchun Song
On Mon, Jul 04, 2022 at 11:38:16AM +0100, Matthew Wilcox wrote:
> On Mon, Jul 04, 2022 at 03:40:54PM +0800, Muchun Song wrote:
> > FSDAX page refcounts are 1-based, rather than 0-based: if refcount is
> > 1, then the page is freed.  The FSDAX pages can be pinned through GUP,
> > then they will be unpinned via unpin_user_page() using a folio variant
> > to put the page, however, folio variants did not consider this special
> > case, the result will be to miss a wakeup event (like the user of
> > __fuse_dax_break_layouts()).
> 
> Argh, no.  The 1-based refcounts are a blight on the entire kernel.
> They need to go away, not be pushed into folios as well.  I think

I would be happy if this could go away.

> we're close to having that fixed, but until then, this should do
> the trick?
> 

The following fix looks good to me since it lowers the overhead as
much as possible

Thanks.

> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index cc98ab012a9b..4cef5e0f78b6 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -1129,18 +1129,18 @@ static inline bool is_zone_movable_page(const struct 
> page *page)
>  #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
>  DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
>  
> -bool __put_devmap_managed_page(struct page *page);
> -static inline bool put_devmap_managed_page(struct page *page)
> +bool __put_devmap_managed_page(struct page *page, int refs);
> +static inline bool put_devmap_managed_page(struct page *page, int refs)
>  {
>   if (!static_branch_unlikely(_managed_key))
>   return false;
>   if (!is_zone_device_page(page))
>   return false;
> - return __put_devmap_managed_page(page);
> + return __put_devmap_managed_page(page, refs);
>  }
>  
>  #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
> -static inline bool put_devmap_managed_page(struct page *page)
> +static inline bool put_devmap_managed_page(struct page *page, int refs)
>  {
>   return false;
>  }
> @@ -1246,7 +1246,7 @@ static inline void put_page(struct page *page)
>* For some devmap managed pages we need to catch refcount transition
>* from 2 to 1:
>*/
> - if (put_devmap_managed_page(>page))
> + if (put_devmap_managed_page(>page, 1))
>   return;
>   folio_put(folio);
>  }
> diff --git a/mm/gup.c b/mm/gup.c
> index d1132b39aa8f..28df02121c78 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -88,7 +88,8 @@ static inline struct folio *try_get_folio(struct page 
> *page, int refs)
>* belongs to this folio.
>*/
>   if (unlikely(page_folio(page) != folio)) {
> - folio_put_refs(folio, refs);
> + if (!put_devmap_managed_page(>page, refs))
> + folio_put_refs(folio, refs);
>   goto retry;
>   }
>  
> @@ -177,6 +178,8 @@ static void gup_put_folio(struct folio *folio, int refs, 
> unsigned int flags)
>   refs *= GUP_PIN_COUNTING_BIAS;
>   }
>  
> + if (put_devmap_managed_page(>page, refs))
> + return;
>   folio_put_refs(folio, refs);
>  }
>  
> diff --git a/mm/memremap.c b/mm/memremap.c
> index b870a659eee6..b25e40e3a11e 100644
> --- a/mm/memremap.c
> +++ b/mm/memremap.c
> @@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page)
>  }
>  
>  #ifdef CONFIG_FS_DAX
> -bool __put_devmap_managed_page(struct page *page)
> +bool __put_devmap_managed_page(struct page *page, int refs)
>  {
>   if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
>   return false;
> @@ -509,7 +509,7 @@ bool __put_devmap_managed_page(struct page *page)
>* refcount is 1, then the page is free and the refcount is
>* stable because nobody holds a reference on the page.
>*/
> - if (page_ref_dec_return(page) == 1)
> + if (page_ref_sub_return(page, refs) == 1)
>   wake_up_var(>_refcount);
>   return true;
>  }
> diff --git a/mm/swap.c b/mm/swap.c
> index c6194cfa2af6..94e42a9bab92 100644
> --- a/mm/swap.c
> +++ b/mm/swap.c
> @@ -960,7 +960,7 @@ void release_pages(struct page **pages, int nr)
>   unlock_page_lruvec_irqrestore(lruvec, flags);
>   lruvec = NULL;
>   }
> - if (put_devmap_managed_page(>page))
> + if (put_devmap_managed_page(>page, 1))
>   continue;
>   if (folio_put_testzero(folio))
>   free_zone_device_page(>page);
> 



Re: [PATCH] mm: fix missing wake-up event for FSDAX pages

2022-07-04 Thread Matthew Wilcox
On Mon, Jul 04, 2022 at 03:40:54PM +0800, Muchun Song wrote:
> FSDAX page refcounts are 1-based, rather than 0-based: if refcount is
> 1, then the page is freed.  The FSDAX pages can be pinned through GUP,
> then they will be unpinned via unpin_user_page() using a folio variant
> to put the page, however, folio variants did not consider this special
> case, the result will be to miss a wakeup event (like the user of
> __fuse_dax_break_layouts()).

Argh, no.  The 1-based refcounts are a blight on the entire kernel.
They need to go away, not be pushed into folios as well.  I think
we're close to having that fixed, but until then, this should do
the trick?

diff --git a/include/linux/mm.h b/include/linux/mm.h
index cc98ab012a9b..4cef5e0f78b6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1129,18 +1129,18 @@ static inline bool is_zone_movable_page(const struct 
page *page)
 #if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_FS_DAX)
 DECLARE_STATIC_KEY_FALSE(devmap_managed_key);
 
-bool __put_devmap_managed_page(struct page *page);
-static inline bool put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page(struct page *page, int refs);
+static inline bool put_devmap_managed_page(struct page *page, int refs)
 {
if (!static_branch_unlikely(_managed_key))
return false;
if (!is_zone_device_page(page))
return false;
-   return __put_devmap_managed_page(page);
+   return __put_devmap_managed_page(page, refs);
 }
 
 #else /* CONFIG_ZONE_DEVICE && CONFIG_FS_DAX */
-static inline bool put_devmap_managed_page(struct page *page)
+static inline bool put_devmap_managed_page(struct page *page, int refs)
 {
return false;
 }
@@ -1246,7 +1246,7 @@ static inline void put_page(struct page *page)
 * For some devmap managed pages we need to catch refcount transition
 * from 2 to 1:
 */
-   if (put_devmap_managed_page(>page))
+   if (put_devmap_managed_page(>page, 1))
return;
folio_put(folio);
 }
diff --git a/mm/gup.c b/mm/gup.c
index d1132b39aa8f..28df02121c78 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -88,7 +88,8 @@ static inline struct folio *try_get_folio(struct page *page, 
int refs)
 * belongs to this folio.
 */
if (unlikely(page_folio(page) != folio)) {
-   folio_put_refs(folio, refs);
+   if (!put_devmap_managed_page(>page, refs))
+   folio_put_refs(folio, refs);
goto retry;
}
 
@@ -177,6 +178,8 @@ static void gup_put_folio(struct folio *folio, int refs, 
unsigned int flags)
refs *= GUP_PIN_COUNTING_BIAS;
}
 
+   if (put_devmap_managed_page(>page, refs))
+   return;
folio_put_refs(folio, refs);
 }
 
diff --git a/mm/memremap.c b/mm/memremap.c
index b870a659eee6..b25e40e3a11e 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -499,7 +499,7 @@ void free_zone_device_page(struct page *page)
 }
 
 #ifdef CONFIG_FS_DAX
-bool __put_devmap_managed_page(struct page *page)
+bool __put_devmap_managed_page(struct page *page, int refs)
 {
if (page->pgmap->type != MEMORY_DEVICE_FS_DAX)
return false;
@@ -509,7 +509,7 @@ bool __put_devmap_managed_page(struct page *page)
 * refcount is 1, then the page is free and the refcount is
 * stable because nobody holds a reference on the page.
 */
-   if (page_ref_dec_return(page) == 1)
+   if (page_ref_sub_return(page, refs) == 1)
wake_up_var(>_refcount);
return true;
 }
diff --git a/mm/swap.c b/mm/swap.c
index c6194cfa2af6..94e42a9bab92 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -960,7 +960,7 @@ void release_pages(struct page **pages, int nr)
unlock_page_lruvec_irqrestore(lruvec, flags);
lruvec = NULL;
}
-   if (put_devmap_managed_page(>page))
+   if (put_devmap_managed_page(>page, 1))
continue;
if (folio_put_testzero(folio))
free_zone_device_page(>page);



[PATCH] mm: fix missing wake-up event for FSDAX pages

2022-07-04 Thread Muchun Song
FSDAX page refcounts are 1-based, rather than 0-based: if refcount is
1, then the page is freed.  The FSDAX pages can be pinned through GUP,
then they will be unpinned via unpin_user_page() using a folio variant
to put the page, however, folio variants did not consider this special
case, the result will be to miss a wakeup event (like the user of
__fuse_dax_break_layouts()).

Fixes: d8ddc099c6b3 ("mm/gup: Add gup_put_folio()")
Signed-off-by: Muchun Song 
---
 include/linux/mm.h | 22 +++---
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 517f9deba56f..32aaa7b06f5a 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1223,6 +1223,9 @@ static inline __must_check bool try_get_page(struct page 
*page)
  */
 static inline void folio_put(struct folio *folio)
 {
+   if (put_devmap_managed_page(>page))
+   return;
+
if (folio_put_testzero(folio))
__folio_put(folio);
 }
@@ -1243,8 +1246,13 @@ static inline void folio_put(struct folio *folio)
  */
 static inline void folio_put_refs(struct folio *folio, int refs)
 {
-   if (folio_ref_sub_and_test(folio, refs))
-   __folio_put(folio);
+   /*
+* For fsdax managed pages we need to catch refcount transition
+* from 2 to 1:
+*/
+   if (refs > 1)
+   folio_ref_sub(folio, refs - 1);
+   folio_put(folio);
 }
 
 void release_pages(struct page **pages, int nr);
@@ -1268,15 +1276,7 @@ static inline void folios_put(struct folio **folios, 
unsigned int nr)
 
 static inline void put_page(struct page *page)
 {
-   struct folio *folio = page_folio(page);
-
-   /*
-* For some devmap managed pages we need to catch refcount transition
-* from 2 to 1:
-*/
-   if (put_devmap_managed_page(>page))
-   return;
-   folio_put(folio);
+   folio_put(page_folio(page));
 }
 
 /*
-- 
2.11.0