On Sat, Sep 06, 2025 at 08:56:48AM +0200, David Hildenbrand wrote: > On 06.09.25 03:05, John Hubbard wrote: > > > > Probably a similar sentiment as Lorenzo here...the above diffs make the code > > *worse* to read. In fact, I recall adding record_subpages() here long ago, > > specifically to help clarify what was going on. > > Well, there is a lot I dislike about record_subpages() to go back there. > Starting with "as Willy keeps explaining, the concept of subpages do > not exist and ending with "why do we fill out the array even on failure".
Yes > > :) > > > > > Now it's been returned to it's original, cryptic form. > > > > The code in the caller was so uncryptic that both me and Lorenzo missed > that magical addition. :P :'( > > > Just my take on it, for whatever that's worth. :) > > As always, appreciated. > > I could of course keep the simple loop in some "record_folio_pages" > function and clean up what I dislike about record_subpages(). > > But I much rather want the call chain to be cleaned up instead, if possible. > > > Roughly, what I am thinking (limiting it to pte+pmd case) about is the > following: I cannot get the below to apply even with the original patch here applied + fix. It looks like (in mm-new :) commit e73f43a66d5f ("mm/gup: remove dead pgmap refcounting code") by Alastair has conflicted here, but even then I can't make it apply, with/without your fix...! > > > From d6d6d21dbf435d8030782a627175e36e6c7b2dfb Mon Sep 17 00:00:00 2001 > From: David Hildenbrand <da...@redhat.com> > Date: Sat, 6 Sep 2025 08:33:42 +0200 > Subject: [PATCH] tmp > > Signed-off-by: David Hildenbrand <da...@redhat.com> > --- > mm/gup.c | 79 ++++++++++++++++++++++++++------------------------------ > 1 file changed, 36 insertions(+), 43 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 22420f2069ee1..98907ead749c0 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2845,12 +2845,11 @@ static void __maybe_unused > gup_fast_undo_dev_pagemap(int *nr, int nr_start, > * also check pmd here to make sure pmd doesn't change (corresponds to > * pmdp_collapse_flush() in the THP collapse code path). > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned > long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > struct dev_pagemap *pgmap = NULL; > - int ret = 0; > + unsigned long nr_pages = 0; > pte_t *ptep, *ptem; > ptem = ptep = pte_offset_map(&pmd, addr); > @@ -2908,24 +2907,20 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, > unsigned long addr, > * details. > */ > if (flags & FOLL_PIN) { > - ret = arch_make_folio_accessible(folio); > - if (ret) { > + if (arch_make_folio_accessible(folio)) { > gup_put_folio(folio, 1, flags); > goto pte_unmap; > } > } > folio_set_referenced(folio); > - pages[*nr] = page; > - (*nr)++; > + pages[nr_pages++] = page; > } while (ptep++, addr += PAGE_SIZE, addr != end); > - ret = 1; > - > pte_unmap: > if (pgmap) > put_dev_pagemap(pgmap); > pte_unmap(ptem); > - return ret; > + return nr_pages; > } > #else > @@ -2938,21 +2933,24 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, > unsigned long addr, > * get_user_pages_fast_only implementation that can pin pages. Thus it's > still > * useful to have gup_fast_pmd_leaf even if we can't operate on ptes. > */ > -static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned > long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > return 0; > } > #endif /* CONFIG_ARCH_HAS_PTE_SPECIAL */ > -static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned > long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > + const unsigned long nr_pages = (end - addr) >> PAGE_SHIFT; > struct page *page; > struct folio *folio; > - int refs; > + unsigned long i; > + > + /* See gup_fast_pte_range() */ > + if (pmd_protnone(orig)) > + return 0; > if (!pmd_access_permitted(orig, flags & FOLL_WRITE)) > return 0; > @@ -2960,33 +2958,30 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, > unsigned long addr, > if (pmd_special(orig)) > return 0; > - refs = (end - addr) >> PAGE_SHIFT; > page = pmd_page(orig) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); > - folio = try_grab_folio_fast(page, refs, flags); > + folio = try_grab_folio_fast(page, nr_pages, flags); > if (!folio) > return 0; > if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > if (!gup_fast_folio_allowed(folio, flags)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > if (!pmd_write(orig) && gup_must_unshare(NULL, flags, &folio->page)) { > - gup_put_folio(folio, refs, flags); > + gup_put_folio(folio, nr_pages, flags); > return 0; > } > - pages += *nr; > - *nr += refs; > - for (; refs; refs--) > + for (i = 0; i < nr_pages; i++) > *(pages++) = page++; > folio_set_referenced(folio); > - return 1; > + return nr_pages; > } > static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, > @@ -3033,11 +3028,11 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, > unsigned long addr, > return 1; > } > -static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned long addr, > - unsigned long end, unsigned int flags, struct page **pages, > - int *nr) > +static unsigned long gup_fast_pmd_range(pud_t *pudp, pud_t pud, unsigned > long addr, > + unsigned long end, unsigned int flags, struct page **pages) > { > - unsigned long next; > + unsigned long cur_nr_pages, next; > + unsigned long nr_pages = 0; > pmd_t *pmdp; > pmdp = pmd_offset_lockless(pudp, pud, addr); > @@ -3046,23 +3041,21 @@ static int gup_fast_pmd_range(pud_t *pudp, pud_t pud, > unsigned long addr, > next = pmd_addr_end(addr, end); > if (!pmd_present(pmd)) > - return 0; > + break; > - if (unlikely(pmd_leaf(pmd))) { > - /* See gup_fast_pte_range() */ > - if (pmd_protnone(pmd)) > - return 0; > + if (unlikely(pmd_leaf(pmd))) > + cur_nr_pages = gup_fast_pmd_leaf(pmd, pmdp, addr, next, > flags, pages); > + else > + cur_nr_pages = gup_fast_pte_range(pmd, pmdp, addr, > next, flags, pages); > - if (!gup_fast_pmd_leaf(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + nr_pages += cur_nr_pages; > + pages += cur_nr_pages; > - } else if (!gup_fast_pte_range(pmd, pmdp, addr, next, flags, > - pages, nr)) > - return 0; > + if (nr_pages != (next - addr) >> PAGE_SIZE) > + break; > } while (pmdp++, addr = next, addr != end); > - return 1; > + return nr_pages; > } > static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, OK I guess you intentionally left the rest as a TODO :) So I'll wait for you to post it before reviewing in-depth. This generally LGTM as an approach, getting rid of *nr is important that's really horrible. > -- > 2.50.1 > > > > Oh, I might even have found a bug moving away from that questionable > "ret==1 means success" handling in gup_fast_pte_range()? Will > have to double-check, but likely the following is the right thing to do. > > > > From 8f48b25ef93e7ef98611fd58ec89384ad5171782 Mon Sep 17 00:00:00 2001 > From: David Hildenbrand <da...@redhat.com> > Date: Sat, 6 Sep 2025 08:46:45 +0200 > Subject: [PATCH] mm/gup: fix handling of errors from > arch_make_folio_accessible() in follow_page_pte() > > In case we call arch_make_folio_accessible() and it fails, we would > incorrectly return a value that is "!= 0" to the caller, indicating that > we pinned all requested pages and that the caller can keep going. > > follow_page_pte() is not supposed to return error values, but instead > 0 on failure and 1 on success. > > That is of course wrong, because the caller will just keep going pinning > more pages. If we happen to pin a page afterwards, we're in trouble, > because we essentially skipped some pages. > > Fixes: f28d43636d6f ("mm/gup/writeback: add callbacks for inaccessible pages") > Signed-off-by: David Hildenbrand <da...@redhat.com> > --- > mm/gup.c | 3 +-- > 1 file changed, 1 insertion(+), 2 deletions(-) > > diff --git a/mm/gup.c b/mm/gup.c > index 22420f2069ee1..cff226ec0ee7d 100644 > --- a/mm/gup.c > +++ b/mm/gup.c > @@ -2908,8 +2908,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, > unsigned long addr, > * details. > */ > if (flags & FOLL_PIN) { > - ret = arch_make_folio_accessible(folio); > - if (ret) { > + if (arch_make_folio_accessible(folio)) { Oh Lord above. Lol. Yikes. Yeah I think your fix is valid... > gup_put_folio(folio, 1, flags); > goto pte_unmap; > } > -- > 2.50.1 > > > -- > Cheers > > David / dhildenb >