On Fri, Aug 29, 2025 at 04:34:54PM +0200, David Hildenbrand wrote:
> On 28.08.25 19:28, Lorenzo Stoakes wrote:
> > On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
> > > Let's disallow handing out PFN ranges with non-contiguous pages, so we
> > > can remove the nth-page usage in __cma_alloc(), and so any callers don't
> > > have to worry about that either when wanting to blindly iterate pages.
> > >
> > > This is really only a problem in configs with SPARSEMEM but without
> > > SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
> > > cases.
> >
> > I'm guessing this is something that we don't need to worry about in
> > reality?
>
> That my theory yes.

Let's hope correct haha, but seems reasonable.

>
> >
> > >
> > > Will this cause harm? Probably not, because it's mostly 32bit that does
> > > not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
> > > look into allocating the memmap for the memory sections spanned by a
> > > single CMA region in one go from memblock.
> > >
> > > Reviewed-by: Alexandru Elisei <alexandru.eli...@arm.com>
> > > Signed-off-by: David Hildenbrand <da...@redhat.com>
> >
> > LGTM other than refactoring point below.
> >
> > CMA stuff looks fine afaict after staring at it for a while, on proviso
> > that handing out ranges within the same section is always going to be the
> > case.
> >
> > Anyway overall,
> >
> > LGTM, so:
> >
> > Reviewed-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>
> >
> >
> > > ---
> > >   include/linux/mm.h |  6 ++++++
> > >   mm/cma.c           | 39 ++++++++++++++++++++++++---------------
> > >   mm/util.c          | 33 +++++++++++++++++++++++++++++++++
> > >   3 files changed, 63 insertions(+), 15 deletions(-)
> > >
> > > diff --git a/include/linux/mm.h b/include/linux/mm.h
> > > index f6880e3225c5c..2ca1eb2db63ec 100644
> > > --- a/include/linux/mm.h
> > > +++ b/include/linux/mm.h
> > > @@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
> > >   extern unsigned long sysctl_admin_reserve_kbytes;
> > >
> > >   #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > > +bool page_range_contiguous(const struct page *page, unsigned long 
> > > nr_pages);
> > >   #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
> > >   #else
> > >   #define nth_page(page,n) ((page) + (n))
> > > +static inline bool page_range_contiguous(const struct page *page,
> > > +         unsigned long nr_pages)
> > > +{
> > > + return true;
> > > +}
> > >   #endif
> > >
> > >   /* to align the pointer to the (next) page boundary */
> > > diff --git a/mm/cma.c b/mm/cma.c
> > > index e56ec64d0567e..813e6dc7b0954 100644
> > > --- a/mm/cma.c
> > > +++ b/mm/cma.c
> > > @@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct 
> > > cma_memrange *cmr,
> > >                                   unsigned long count, unsigned int align,
> > >                                   struct page **pagep, gfp_t gfp)
> > >   {
> > > - unsigned long mask, offset;
> > > - unsigned long pfn = -1;
> > > - unsigned long start = 0;
> > >           unsigned long bitmap_maxno, bitmap_no, bitmap_count;
> > > + unsigned long start, pfn, mask, offset;
> > >           int ret = -EBUSY;
> > >           struct page *page = NULL;
> > >
> > > @@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct 
> > > cma_memrange *cmr,
> > >           if (bitmap_count > bitmap_maxno)
> > >                   goto out;
> > >
> > > - for (;;) {
> > > + for (start = 0; ; start = bitmap_no + mask + 1) {
> > >                   spin_lock_irq(&cma->lock);
> > >                   /*
> > >                    * If the request is larger than the available number
> > > @@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct 
> > > cma_memrange *cmr,
> > >                           spin_unlock_irq(&cma->lock);
> > >                           break;
> > >                   }
> > > +
> > > +         pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> > > +         page = pfn_to_page(pfn);
> > > +
> > > +         /*
> > > +          * Do not hand out page ranges that are not contiguous, so
> > > +          * callers can just iterate the pages without having to worry
> > > +          * about these corner cases.
> > > +          */
> > > +         if (!page_range_contiguous(page, count)) {
> > > +                 spin_unlock_irq(&cma->lock);
> > > +                 pr_warn_ratelimited("%s: %s: skipping incompatible area 
> > > [0x%lx-0x%lx]",
> > > +                                     __func__, cma->name, pfn, pfn + 
> > > count - 1);
> > > +                 continue;
> > > +         }
> > > +
> > >                   bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
> > >                   cma->available_count -= count;
> > >                   /*
> > > @@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct 
> > > cma_memrange *cmr,
> > >                    */
> > >                   spin_unlock_irq(&cma->lock);
> > >
> > > -         pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
> > >                   mutex_lock(&cma->alloc_mutex);
> > >                   ret = alloc_contig_range(pfn, pfn + count, 
> > > ACR_FLAGS_CMA, gfp);
> > >                   mutex_unlock(&cma->alloc_mutex);
> > > -         if (ret == 0) {
> > > -                 page = pfn_to_page(pfn);
> > > +         if (!ret)
> > >                           break;
> > > -         }
> > >
> > >                   cma_clear_bitmap(cma, cmr, pfn, count);
> > >                   if (ret != -EBUSY)
> > >                           break;
> > >
> > >                   pr_debug("%s(): memory range at pfn 0x%lx %p is busy, 
> > > retrying\n",
> > > -                  __func__, pfn, pfn_to_page(pfn));
> > > +                  __func__, pfn, page);
> > >
> > > -         trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
> > > -                                    count, align);
> > > -         /* try again with a bit different memory target */
> > > -         start = bitmap_no + mask + 1;
> > > +         trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
> > >           }
> > >   out:
> > > - *pagep = page;
> > > + if (!ret)
> > > +         *pagep = page;
> > >           return ret;
> > >   }
> > >
> > > @@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, 
> > > unsigned long count,
> > >            */
> > >           if (page) {
> > >                   for (i = 0; i < count; i++)
> > > -                 page_kasan_tag_reset(nth_page(page, i));
> > > +                 page_kasan_tag_reset(page + i);
> > >           }
> > >
> > >           if (ret && !(gfp & __GFP_NOWARN)) {
> > > diff --git a/mm/util.c b/mm/util.c
> > > index d235b74f7aff7..0bf349b19b652 100644
> > > --- a/mm/util.c
> > > +++ b/mm/util.c
> > > @@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, 
> > > pte_t *ptep, pte_t pte,
> > >   {
> > >           return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 
> > > 0);
> > >   }
> > > +
> > > +#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
> > > +/**
> > > + * page_range_contiguous - test whether the page range is contiguous
> > > + * @page: the start of the page range.
> > > + * @nr_pages: the number of pages in the range.
> > > + *
> > > + * Test whether the page range is contiguous, such that they can be 
> > > iterated
> > > + * naively, corresponding to iterating a contiguous PFN range.
> > > + *
> > > + * This function should primarily only be used for debug checks, or when
> > > + * working with page ranges that are not naturally contiguous (e.g., 
> > > pages
> > > + * within a folio are).
> > > + *
> > > + * Returns true if contiguous, otherwise false.
> > > + */
> > > +bool page_range_contiguous(const struct page *page, unsigned long 
> > > nr_pages)
> > > +{
> > > + const unsigned long start_pfn = page_to_pfn(page);
> > > + const unsigned long end_pfn = start_pfn + nr_pages;
> > > + unsigned long pfn;
> > > +
> > > + /*
> > > +  * The memmap is allocated per memory section. We need to check
> > > +  * each involved memory section once.
> > > +  */
> > > + for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> > > +      pfn < end_pfn; pfn += PAGES_PER_SECTION)
> > > +         if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
> > > +                 return false;
> >
> > I find this pretty confusing, my test for this is how many times I have to 
> > read
> > the code to understand what it's doing :)
> >
> > So we have something like:
> >
> >    (pfn of page)
> >     start_pfn        pfn = align UP
> >          |                 |
> >          v                 v
> >   |         section        |
> >          <----------------->
> >            pfn - start_pfn
> >
> > Then check page + (pfn - start_pfn) == pfn_to_page(pfn)
> >
> > And loop such that:
> >
> >    (pfn of page)
> >     start_pfn                                      pfn
> >          |                                          |
> >          v                                          v
> >   |         section        |         section        |
> >          <------------------------------------------>
> >                          pfn - start_pfn
> >
> > Again check page + (pfn - start_pfn) == pfn_to_page(pfn)
> >
> > And so on.
> >
> > So the logic looks good, but it's just... that took me a hot second to
> > parse :)
> >
> > I think a few simple fixups
> >
> > bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
> > {
> >     const unsigned long start_pfn = page_to_pfn(page);
> >     const unsigned long end_pfn = start_pfn + nr_pages;
> >     /* The PFN of the start of the next section. */
> >     unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
> >     /* The page we'd expected to see if the range were contiguous. */
> >     struct page *expected = page + (pfn - start_pfn);
> >
> >     /*
> >      * The memmap is allocated per memory section. We need to check
> >      * each involved memory section once.
> >      */
> >     for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += 
> > PAGES_PER_SECTION)
> >             if (unlikely(expected != pfn_to_page(pfn)))
> >                     return false;
> >     return true;
> > }
> >
>
> Hm, I prefer my variant, especially where the pfn is calculated in the for 
> loop. Likely a
> matter of personal taste.

Sure this is always a factor in code :)

>
> But I can see why skipping the first section might be a surprise when not
> having the semantics of ALIGN() in the cache.

Yup!

>
> So I'll add the following on top:
>
> diff --git a/mm/util.c b/mm/util.c
> index 0bf349b19b652..fbdb73aaf35fe 100644
> --- a/mm/util.c
> +++ b/mm/util.c
> @@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, 
> unsigned long nr_pages)
>         unsigned long pfn;
>         /*
> -        * The memmap is allocated per memory section. We need to check
> -        * each involved memory section once.
> +        * The memmap is allocated per memory section, so no need to check
> +        * within the first section. However, we need to check each other
> +        * spanned memory section once, making sure the first page in a
> +        * section could similarly be reached by just iterating pages.
>          */
>         for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
>              pfn < end_pfn; pfn += PAGES_PER_SECTION)

Cool this helps clarify things, that'll do fine!

>
> Thanks!
>
> --
> Cheers
>
> David / dhildenb
>
>

Cheers, Lorenzo

Reply via email to