On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote:
> On 06.09.25 03:05, John Hubbard wrote:
> >
> > Probably a similar sentiment as Lorenzo here...the above diffs make the code
> > *worse* to read. In fact, I recall adding record_subpages() here long ago,
> > specifically to help clarify what was going on.
>
> Well, there is a lot I dislike about record_subpages() to go back there.
> Starting with "as Willy keeps explaining, the concept of subpages do
> not exist and ending with "why do we fill out the array even on failure".

Yes

>
> :)
>
> >
> > Now it's been returned to it's original, cryptic form.
> >
>
> The code in the caller was so uncryptic that both me and Lorenzo missed
> that magical addition. :P

:'(

>
> > Just my take on it, for whatever that's worth. :)
>
> As always, appreciated.
>
> I could of course keep the simple loop in some "record_folio_pages"
> function and clean up what I dislike about record_subpages().
>
> But I much rather want the call chain to be cleaned up instead, if possible.
>
>
> Roughly, what I am thinking (limiting it to pte+pmd case) about is the 
> following:

I cannot get the below to apply even with the original patch here applied + fix.

It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead pgmap
refcounting code") by Alastair has conflicted here, but even then I can't make
it apply, with/without your fix...!



>
>
> From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001
> From: David Hildenbrand <da...@redhat.com>
> Date: Sat, 6 Sep 2025 08:33:42 +0200
> Subject: [PATCH] tmp
>
> Signed-off-by: David Hildenbrand <da...@redhat.com>
> ---
>  mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------
>  1 file changed, 36 insertions(+), 43 deletions(-)
>
> diff --git a/mm/gup.c b/mm/gup.c
> index 22420f2069ee1..98907ead749c0 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2845,12 +2845,11 @@ static void __maybe_unused 
> gup_fast_undo_dev_pagemap(int *nr, int nr_start,
>   * also check pmd here to make sure pmd doesn't change (corresponds to
>   * pmdp_collapse_flush() in the THP collapse code path).
>   */
> -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
> -             unsigned long end, unsigned int flags, struct page **pages,
> -             int *nr)
> +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned 
> long addr,
> +             unsigned long end, unsigned int flags, struct page **pages)
>  {
>       struct dev_pagemap *pgmap = NULL;
> -     int ret = 0;
> +     unsigned long nr_pages = 0;
>       pte_t *ptep, *ptem;
>       ptem = ptep = pte_offset_map(&pmd, addr);
> @@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, 
> unsigned long addr,
>                * details.
>                */
>               if (flags & FOLL_PIN) {
> -                     ret = arch_make_folio_accessible(folio);
> -                     if (ret) {
> +                     if (arch_make_folio_accessible(folio)) {
>                               gup_put_folio(folio, 1, flags);
>                               goto pte_unmap;
>                       }
>               }
>               folio_set_referenced(folio);
> -             pages[*nr] = page;
> -             (*nr)++;
> +             pages[nr_pages++] = page;
>       } while (ptep++, addr += PAGE_SIZE, addr != end);
> -     ret = 1;
> -
>  pte_unmap:
>       if (pgmap)
>               put_dev_pagemap(pgmap);
>       pte_unmap(ptem);
> -     return ret;
> +     return nr_pages;
>  }
>  #else
> @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, 
> unsigned long addr,
>   * get_user_pages_fast_only implementation that can pin pages. Thus it's 
> still
>   * useful to have gup_fast_pmd_leaf even if we can't operate on ptes.
>   */
> -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr,
> -             unsigned long end, unsigned int flags, struct page **pages,
> -             int *nr)
> +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned 
> long addr,
> +             unsigned long end, unsigned int flags, struct page **pages)
>  {
>       return 0;
>  }
>  #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */
> -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr,
> -             unsigned long end, unsigned int flags, struct page **pages,
> -             int *nr)
> +static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned 
> long addr,
> +             unsigned long end, unsigned int flags, struct page **pages)
>  {
> +     const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT;
>       struct page *page;
>       struct folio *folio;
> -     int refs;
> +     unsigned long i;
> +
> +     /* See gup_fast_pte_range() */
> +     if (pmd_protnone(orig))
> +             return 0;
>       if (!pmd_access_permitted(orig, flags & FOLL_WRITE))
>               return 0;
> @@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, 
> unsigned long addr,
>       if (pmd_special(orig))
>               return 0;
> -     refs = (end - addr) >> PAGE_SHIFT;
>       page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT);
> -     folio = try_grab_folio_fast(page, refs, flags);
> +     folio = try_grab_folio_fast(page, nr_pages, flags);
>       if (!folio)
>               return 0;
>       if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) {
> -             gup_put_folio(folio, refs, flags);
> +             gup_put_folio(folio, nr_pages, flags);
>               return 0;
>       }
>       if (!gup_fast_folio_allowed(folio, flags)) {
> -             gup_put_folio(folio, refs, flags);
> +             gup_put_folio(folio, nr_pages, flags);
>               return 0;
>       }
>       if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) {
> -             gup_put_folio(folio, refs, flags);
> +             gup_put_folio(folio, nr_pages, flags);
>               return 0;
>       }
> -     pages += *nr;
> -     *nr += refs;
> -     for (; refs; refs--)
> +     for (i = 0; i < nr_pages; i++)
>               *(pages++) = page++;
>       folio_set_referenced(folio);
> -     return 1;
> +     return nr_pages;
>  }
>  static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr,
> @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, 
> unsigned long addr,
>       return 1;
>  }
> -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr,
> -             unsigned long end, unsigned int flags, struct page **pages,
> -             int *nr)
> +static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned 
> long addr,
> +             unsigned long end, unsigned int flags, struct page **pages)
>  {
> -     unsigned long next;
> +     unsigned long cur_nr_pages, next;
> +     unsigned long nr_pages = 0;
>       pmd_t *pmdp;
>       pmdp = pmd_offset_lockless(pudp, pud, addr);
> @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, 
> unsigned long addr,
>               next = pmd_addr_end(addr, end);
>               if (!pmd_present(pmd))
> -                     return 0;
> +                     break;
> -             if (unlikely(pmd_leaf(pmd))) {
> -                     /* See gup_fast_pte_range() */
> -                     if (pmd_protnone(pmd))
> -                             return 0;
> +             if (unlikely(pmd_leaf(pmd)))
> +                     cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, 
> flags, pages);
> +             else
> +                     cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, 
> next, flags, pages);
> -                     if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags,
> -                             pages, nr))
> -                             return 0;
> +             nr_pages += cur_nr_pages;
> +             pages += cur_nr_pages;
> -             } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags,
> -                                            pages, nr))
> -                     return 0;
> +             if (nr_pages != (next - addr) >> PAGE_SIZE)
> +                     break;
>       } while (pmdp++, addr = next, addr != end);
> -     return 1;
> +     return nr_pages;
>  }
>  static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr,

OK I guess you intentionally left the rest as a TODO :)

So I'll wait for you to post it before reviewing in-depth.

This generally LGTM as an approach, getting rid of *nr is important that's
really horrible.

> --
> 2.50.1
>
>
>
> Oh, I might even have found a bug moving away from that questionable
> "ret==1 means success" handling in gup_fast_pte_range()? Will
> have to double-check, but likely the following is the right thing to do.
>
>
>
> From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001
> From: David Hildenbrand <da...@redhat.com>
> Date: Sat, 6 Sep 2025 08:46:45 +0200
> Subject: [PATCH] mm/gup: fix handling of errors from
>  arch_make_folio_accessible() in follow_page_pte()
>
> In case we call arch_make_folio_accessible() and it fails, we would
> incorrectly return a value that is "!= 0" to the caller, indicating that
> we pinned all requested pages and that the caller can keep going.
>
> follow_page_pte() is not supposed to return error values, but instead
> 0 on failure and 1 on success.
>
> That is of course wrong, because the caller will just keep going pinning
> more pages. If we happen to pin a page afterwards, we're in trouble,
> because we essentially skipped some pages.
>
> Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages")
> Signed-off-by: David Hildenbrand <da...@redhat.com>
> ---
>  mm/gup.c | 3 +--
>  1 file changed, 1 insertion(+), 2 deletions(-)
>
> diff --git a/mm/gup.c b/mm/gup.c
> index 22420f2069ee1..cff226ec0ee7d 100644
> --- a/mm/gup.c
> +++ b/mm/gup.c
> @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, 
> unsigned long addr,
>                * details.
>                */
>               if (flags & FOLL_PIN) {
> -                     ret = arch_make_folio_accessible(folio);
> -                     if (ret) {
> +                     if (arch_make_folio_accessible(folio)) {

Oh Lord above. Lol. Yikes.

Yeah I think your fix is valid...

>                               gup_put_folio(folio, 1, flags);
>                               goto pte_unmap;
>                       }
> --
> 2.50.1
>
>
> --
> Cheers
>
> David / dhildenb
>

Reply via email to