On 28.08.25 19:28, Lorenzo Stoakes wrote:
On Thu, Aug 28, 2025 at 12:01:25AM +0200, David Hildenbrand wrote:
Let's disallow handing out PFN ranges with non-contiguous pages, so we
can remove the nth-page usage in __cma_alloc(), and so any callers don't
have to worry about that either when wanting to blindly iterate pages.

This is really only a problem in configs with SPARSEMEM but without
SPARSEMEM_VMEMMAP, and only when we would cross memory sections in some
cases.

I'm guessing this is something that we don't need to worry about in
reality?

That my theory yes.



Will this cause harm? Probably not, because it's mostly 32bit that does
not support SPARSEMEM_VMEMMAP. If this ever becomes a problem we could
look into allocating the memmap for the memory sections spanned by a
single CMA region in one go from memblock.

Reviewed-by: Alexandru Elisei <alexandru.eli...@arm.com>
Signed-off-by: David Hildenbrand <da...@redhat.com>

LGTM other than refactoring point below.

CMA stuff looks fine afaict after staring at it for a while, on proviso
that handing out ranges within the same section is always going to be the
case.

Anyway overall,

LGTM, so:

Reviewed-by: Lorenzo Stoakes <lorenzo.stoa...@oracle.com>


---
  include/linux/mm.h |  6 ++++++
  mm/cma.c           | 39 ++++++++++++++++++++++++---------------
  mm/util.c          | 33 +++++++++++++++++++++++++++++++++
  3 files changed, 63 insertions(+), 15 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f6880e3225c5c..2ca1eb2db63ec 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -209,9 +209,15 @@ extern unsigned long sysctl_user_reserve_kbytes;
  extern unsigned long sysctl_admin_reserve_kbytes;

  #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages);
  #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
  #else
  #define nth_page(page,n) ((page) + (n))
+static inline bool page_range_contiguous(const struct page *page,
+               unsigned long nr_pages)
+{
+       return true;
+}
  #endif

  /* to align the pointer to the (next) page boundary */
diff --git a/mm/cma.c b/mm/cma.c
index e56ec64d0567e..813e6dc7b0954 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -780,10 +780,8 @@ static int cma_range_alloc(struct cma *cma, struct 
cma_memrange *cmr,
                                unsigned long count, unsigned int align,
                                struct page **pagep, gfp_t gfp)
  {
-       unsigned long mask, offset;
-       unsigned long pfn = -1;
-       unsigned long start = 0;
        unsigned long bitmap_maxno, bitmap_no, bitmap_count;
+       unsigned long start, pfn, mask, offset;
        int ret = -EBUSY;
        struct page *page = NULL;

@@ -795,7 +793,7 @@ static int cma_range_alloc(struct cma *cma, struct 
cma_memrange *cmr,
        if (bitmap_count > bitmap_maxno)
                goto out;

-       for (;;) {
+       for (start = 0; ; start = bitmap_no + mask + 1) {
                spin_lock_irq(&cma->lock);
                /*
                 * If the request is larger than the available number
@@ -812,6 +810,22 @@ static int cma_range_alloc(struct cma *cma, struct 
cma_memrange *cmr,
                        spin_unlock_irq(&cma->lock);
                        break;
                }
+
+               pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
+               page = pfn_to_page(pfn);
+
+               /*
+                * Do not hand out page ranges that are not contiguous, so
+                * callers can just iterate the pages without having to worry
+                * about these corner cases.
+                */
+               if (!page_range_contiguous(page, count)) {
+                       spin_unlock_irq(&cma->lock);
+                       pr_warn_ratelimited("%s: %s: skipping incompatible area 
[0x%lx-0x%lx]",
+                                           __func__, cma->name, pfn, pfn + 
count - 1);
+                       continue;
+               }
+
                bitmap_set(cmr->bitmap, bitmap_no, bitmap_count);
                cma->available_count -= count;
                /*
@@ -821,29 +835,24 @@ static int cma_range_alloc(struct cma *cma, struct 
cma_memrange *cmr,
                 */
                spin_unlock_irq(&cma->lock);

-               pfn = cmr->base_pfn + (bitmap_no << cma->order_per_bit);
                mutex_lock(&cma->alloc_mutex);
                ret = alloc_contig_range(pfn, pfn + count, ACR_FLAGS_CMA, gfp);
                mutex_unlock(&cma->alloc_mutex);
-               if (ret == 0) {
-                       page = pfn_to_page(pfn);
+               if (!ret)
                        break;
-               }

                cma_clear_bitmap(cma, cmr, pfn, count);
                if (ret != -EBUSY)
                        break;

                pr_debug("%s(): memory range at pfn 0x%lx %p is busy, 
retrying\n",
-                        __func__, pfn, pfn_to_page(pfn));
+                        __func__, pfn, page);

-               trace_cma_alloc_busy_retry(cma->name, pfn, pfn_to_page(pfn),
-                                          count, align);
-               /* try again with a bit different memory target */
-               start = bitmap_no + mask + 1;
+               trace_cma_alloc_busy_retry(cma->name, pfn, page, count, align);
        }
  out:
-       *pagep = page;
+       if (!ret)
+               *pagep = page;
        return ret;
  }

@@ -882,7 +891,7 @@ static struct page *__cma_alloc(struct cma *cma, unsigned 
long count,
         */
        if (page) {
                for (i = 0; i < count; i++)
-                       page_kasan_tag_reset(nth_page(page, i));
+                       page_kasan_tag_reset(page + i);
        }

        if (ret && !(gfp & __GFP_NOWARN)) {
diff --git a/mm/util.c b/mm/util.c
index d235b74f7aff7..0bf349b19b652 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1280,4 +1280,37 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t 
*ptep, pte_t pte,
  {
        return folio_pte_batch_flags(folio, NULL, ptep, &pte, max_nr, 0);
  }
+
+#if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
+/**
+ * page_range_contiguous - test whether the page range is contiguous
+ * @page: the start of the page range.
+ * @nr_pages: the number of pages in the range.
+ *
+ * Test whether the page range is contiguous, such that they can be iterated
+ * naively, corresponding to iterating a contiguous PFN range.
+ *
+ * This function should primarily only be used for debug checks, or when
+ * working with page ranges that are not naturally contiguous (e.g., pages
+ * within a folio are).
+ *
+ * Returns true if contiguous, otherwise false.
+ */
+bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
+{
+       const unsigned long start_pfn = page_to_pfn(page);
+       const unsigned long end_pfn = start_pfn + nr_pages;
+       unsigned long pfn;
+
+       /*
+        * The memmap is allocated per memory section. We need to check
+        * each involved memory section once.
+        */
+       for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
+            pfn < end_pfn; pfn += PAGES_PER_SECTION)
+               if (unlikely(page + (pfn - start_pfn) != pfn_to_page(pfn)))
+                       return false;

I find this pretty confusing, my test for this is how many times I have to read
the code to understand what it's doing :)

So we have something like:

   (pfn of page)
    start_pfn        pfn = align UP
         |                 |
         v                 v
  |         section        |
         <----------------->
           pfn - start_pfn

Then check page + (pfn - start_pfn) == pfn_to_page(pfn)

And loop such that:

   (pfn of page)
    start_pfn                                      pfn
         |                                          |
         v                                          v
  |         section        |         section        |
         <------------------------------------------>
                         pfn - start_pfn

Again check page + (pfn - start_pfn) == pfn_to_page(pfn)

And so on.

So the logic looks good, but it's just... that took me a hot second to
parse :)

I think a few simple fixups

bool page_range_contiguous(const struct page *page, unsigned long nr_pages)
{
        const unsigned long start_pfn = page_to_pfn(page);
        const unsigned long end_pfn = start_pfn + nr_pages;
        /* The PFN of the start of the next section. */
        unsigned long pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
        /* The page we'd expected to see if the range were contiguous. */
        struct page *expected = page + (pfn - start_pfn);

        /*
         * The memmap is allocated per memory section. We need to check
         * each involved memory section once.
         */
        for (; pfn < end_pfn; pfn += PAGES_PER_SECTION, expected += 
PAGES_PER_SECTION)
                if (unlikely(expected != pfn_to_page(pfn)))
                        return false;
        return true;
}


Hm, I prefer my variant, especially where the pfn is calculated in the for 
loop. Likely a
matter of personal taste.

But I can see why skipping the first section might be a surprise when not
having the semantics of ALIGN() in the cache.

So I'll add the following on top:

diff --git a/mm/util.c b/mm/util.c
index 0bf349b19b652..fbdb73aaf35fe 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -1303,8 +1303,10 @@ bool page_range_contiguous(const struct page *page, 
unsigned long nr_pages)
        unsigned long pfn;
/*
-        * The memmap is allocated per memory section. We need to check
-        * each involved memory section once.
+        * The memmap is allocated per memory section, so no need to check
+        * within the first section. However, we need to check each other
+        * spanned memory section once, making sure the first page in a
+        * section could similarly be reached by just iterating pages.
         */
        for (pfn = ALIGN(start_pfn, PAGES_PER_SECTION);
             pfn < end_pfn; pfn += PAGES_PER_SECTION)

Thanks!

--
Cheers

David / dhildenb

Reply via email to