The early boot gigantic hugepage allocation helpers currently mix
allocation with huge_bootmem_page setup, and leave part of the
initialization flow in architecture code.

Refactor the interface to return the allocated huge page pointer and
move the huge_bootmem_page setup into the generic hugetlb code. This
makes the architecture-specific paths focus only on finding memory,
while the common code handles node placement and early page metadata
setup in one place.

This also lets powerpc benefit from memblock_reserved_mark_noinit(),
which it did not enable before.

In addition, upcoming cross-zone validation for boot-time gigantic
hugetlb reservation is common logic. With this refactoring, that logic
can stay in the generic code instead of being duplicated in
architecture-specific paths.

Signed-off-by: Muchun Song <[email protected]>
Reviewed-by: Mike Rapoport (Microsoft) <[email protected]>
Reviewed-by: Oscar Salvador (SUSE) <[email protected]>
---
v2->v3:
- keep powerpc code independent of struct huge_bootmem_page by switching
  it to void * (per Mike Rapoport)
- move huge_bootmem_page internals out of include/linux/hugetlb.h and keep
  them in mm-private scope so the arch code does not need to see the type
  (per Mike Rapoport, echoed by Oscar Salvador)
---
 arch/powerpc/mm/hugetlbpage.c | 13 ++---
 include/linux/hugetlb.h       | 18 ++-----
 mm/hugetlb.c                  | 95 ++++++++++++++---------------------
 mm/hugetlb_cma.c              | 13 ++---
 mm/hugetlb_cma.h              |  8 ++-
 mm/internal.h                 |  9 ++++
 6 files changed, 64 insertions(+), 92 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index 558fafb82b8a..a298746dc143 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -104,17 +104,14 @@ void __init pseries_add_gpage(u64 addr, u64 page_size, 
unsigned long number_of_p
        }
 }
 
-static int __init pseries_alloc_bootmem_huge_page(struct hstate *hstate)
+static __init void *pseries_alloc_bootmem_huge_page(struct hstate *hstate)
 {
-       struct huge_bootmem_page *m;
+       void *m;
        if (nr_gpages == 0)
-               return 0;
+               return NULL;
        m = phys_to_virt(gpage_freearray[--nr_gpages]);
        gpage_freearray[nr_gpages] = 0;
-       list_add(&m->list, &huge_boot_pages[0]);
-       m->hstate = hstate;
-       m->flags = 0;
-       return 1;
+       return m;
 }
 
 bool __init hugetlb_node_alloc_supported(void)
@@ -124,7 +121,7 @@ bool __init hugetlb_node_alloc_supported(void)
 #endif
 
 
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
 
 #ifdef CONFIG_PPC_BOOK3S_64
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 3700c0a1f6ff..09f28dd773b7 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -674,19 +674,11 @@ struct hstate {
        char name[HSTATE_NAME_LEN];
 };
 
-struct cma;
-
-struct huge_bootmem_page {
-       struct list_head list;
-       struct hstate *hstate;
-       unsigned long flags;
-       struct cma *cma;
-};
-
 #define HUGE_BOOTMEM_HVO               0x0001
 #define HUGE_BOOTMEM_ZONES_VALID       0x0002
 #define HUGE_BOOTMEM_CMA               0x0004
 
+struct huge_bootmem_page;
 bool hugetlb_bootmem_page_zones_valid(int nid, struct huge_bootmem_page *m);
 
 int isolate_or_dissolve_huge_folio(struct folio *folio, struct list_head 
*list);
@@ -706,8 +698,8 @@ void restore_reserve_on_error(struct hstate *h, struct 
vm_area_struct *vma,
                                unsigned long address, struct folio *folio);
 
 /* arch callback */
-int __init __alloc_bootmem_huge_page(struct hstate *h, int nid);
-int __init alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid);
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid);
 bool __init hugetlb_node_alloc_supported(void);
 
 void __init hugetlb_add_hstate(unsigned order);
@@ -1138,9 +1130,9 @@ alloc_hugetlb_folio_nodemask(struct hstate *h, int 
preferred_nid,
        return NULL;
 }
 
-static inline int __alloc_bootmem_huge_page(struct hstate *h)
+static inline void *__alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
-       return 0;
+       return NULL;
 }
 
 static inline struct hstate *hstate_file(struct file *f)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 2bf9fe16abb9..5e557c05d80a 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3027,79 +3027,58 @@ struct folio *alloc_hugetlb_folio(struct vm_area_struct 
*vma,
 
 static __init void *alloc_bootmem(struct hstate *h, int nid, bool node_exact)
 {
-       struct huge_bootmem_page *m;
-       int listnode = nid;
-
        if (hugetlb_early_cma(h))
-               m = hugetlb_cma_alloc_bootmem(h, &listnode, node_exact);
-       else {
-               if (node_exact)
-                       m = memblock_alloc_exact_nid_raw(huge_page_size(h),
+               return hugetlb_cma_alloc_bootmem(h, nid, node_exact);
+
+       if (node_exact)
+               return memblock_alloc_exact_nid_raw(huge_page_size(h),
                                huge_page_size(h), 0,
                                MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-               else {
-                       m = memblock_alloc_try_nid_raw(huge_page_size(h),
+
+       return memblock_alloc_try_nid_raw(huge_page_size(h),
                                huge_page_size(h), 0,
                                MEMBLOCK_ALLOC_ACCESSIBLE, nid);
-                       /*
-                        * For pre-HVO to work correctly, pages need to be on
-                        * the list for the node they were actually allocated
-                        * from. That node may be different in the case of
-                        * fallback by memblock_alloc_try_nid_raw. So,
-                        * extract the actual node first.
-                        */
-                       if (m)
-                               listnode = early_pfn_to_nid(PHYS_PFN(__pa(m)));
-               }
-
-               if (m) {
-                       m->flags = 0;
-                       m->cma = NULL;
-               }
-       }
-
-       if (m) {
-               /*
-                * Use the beginning of the huge page to store the
-                * huge_bootmem_page struct (until gather_bootmem
-                * puts them into the mem_map).
-                *
-                * Put them into a private list first because mem_map
-                * is not up yet.
-                */
-               INIT_LIST_HEAD(&m->list);
-               list_add(&m->list, &huge_boot_pages[listnode]);
-               m->hstate = h;
-       }
-
-       return m;
 }
 
-int alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init arch_alloc_bootmem_huge_page(struct hstate *h, int nid)
        __attribute__ ((weak, alias("__alloc_bootmem_huge_page")));
-int __alloc_bootmem_huge_page(struct hstate *h, int nid)
+void *__init __alloc_bootmem_huge_page(struct hstate *h, int nid)
 {
-       struct huge_bootmem_page *m = NULL; /* initialize for clang */
        int nr_nodes, node = nid;
 
        /* do node specific alloc */
-       if (nid != NUMA_NO_NODE) {
-               m = alloc_bootmem(h, node, true);
-               if (!m)
-                       return 0;
-               goto found;
-       }
+       if (nid != NUMA_NO_NODE)
+               return alloc_bootmem(h, node, true);
 
        /* allocate from next node when distributing huge pages */
        for_each_node_mask_to_alloc(&h->next_nid_to_alloc, nr_nodes, node,
-                                   &hugetlb_bootmem_nodes) {
-               m = alloc_bootmem(h, node, false);
-               if (!m)
-                       return 0;
-               goto found;
-       }
+                                   &hugetlb_bootmem_nodes)
+               return alloc_bootmem(h, node, false);
 
-found:
+       return NULL;
+}
+
+static bool __init alloc_bootmem_huge_page(struct hstate *h, int nid)
+{
+       struct huge_bootmem_page *m = arch_alloc_bootmem_huge_page(h, nid);
+
+       if (!m)
+               return false;
+
+       nid = early_pfn_to_nid(PHYS_PFN(__pa(m)));
+       /*
+        * Use the beginning of the huge page to store the huge_bootmem_page
+        * struct (until gather_bootmem puts them into the mem_map).
+        *
+        * Put them into a private list first because mem_map is not up yet.
+        */
+       INIT_LIST_HEAD(&m->list);
+       list_add(&m->list, &huge_boot_pages[nid]);
+       m->hstate = h;
+       if (!hugetlb_early_cma(h)) {
+               m->cma = NULL;
+               m->flags = 0;
+       }
 
        /*
         * Only initialize the head struct page in memmap_init_reserved_pages,
@@ -3111,7 +3090,7 @@ int __alloc_bootmem_huge_page(struct hstate *h, int nid)
        memblock_reserved_mark_noinit(__pa((void *)m + PAGE_SIZE),
                huge_page_size(h) - PAGE_SIZE);
 
-       return 1;
+       return true;
 }
 
 /* Initialize [start_page:end_page_number] tail struct pages of a hugepage */
diff --git a/mm/hugetlb_cma.c b/mm/hugetlb_cma.c
index ce999391cc14..e487d0ffffc0 100644
--- a/mm/hugetlb_cma.c
+++ b/mm/hugetlb_cma.c
@@ -56,14 +56,13 @@ struct folio *hugetlb_cma_alloc_frozen_folio(int order, 
gfp_t gfp_mask,
        return folio;
 }
 
-struct huge_bootmem_page * __init
-hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool node_exact)
+void * __init hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool 
node_exact)
 {
        struct cma *cma;
        struct huge_bootmem_page *m;
-       int node = *nid;
+       int node;
 
-       cma = hugetlb_cma[*nid];
+       cma = hugetlb_cma[nid];
        m = cma_reserve_early(cma, huge_page_size(h));
        if (!m) {
                if (node_exact)
@@ -71,13 +70,11 @@ hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid, bool 
node_exact)
 
                for_each_node_mask(node, hugetlb_bootmem_nodes) {
                        cma = hugetlb_cma[node];
-                       if (!cma || node == *nid)
+                       if (!cma || node == nid)
                                continue;
                        m = cma_reserve_early(cma, huge_page_size(h));
-                       if (m) {
-                               *nid = node;
+                       if (m)
                                break;
-                       }
                }
        }
 
diff --git a/mm/hugetlb_cma.h b/mm/hugetlb_cma.h
index c619c394b1ae..3aa483573d17 100644
--- a/mm/hugetlb_cma.h
+++ b/mm/hugetlb_cma.h
@@ -6,8 +6,7 @@
 void hugetlb_cma_free_frozen_folio(struct folio *folio);
 struct folio *hugetlb_cma_alloc_frozen_folio(int order, gfp_t gfp_mask,
                                      int nid, nodemask_t *nodemask);
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
-                                                   bool node_exact);
+void *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid, bool node_exact);
 bool hugetlb_cma_exclusive_alloc(void);
 unsigned long hugetlb_cma_total_size(void);
 void hugetlb_cma_validate_params(void);
@@ -23,9 +22,8 @@ static inline struct folio 
*hugetlb_cma_alloc_frozen_folio(int order,
        return NULL;
 }
 
-static inline
-struct huge_bootmem_page *hugetlb_cma_alloc_bootmem(struct hstate *h, int *nid,
-                                                   bool node_exact)
+static inline void *hugetlb_cma_alloc_bootmem(struct hstate *h, int nid,
+                                             bool node_exact)
 {
        return NULL;
 }
diff --git a/mm/internal.h b/mm/internal.h
index 004a3f1d5006..6b9802460a7c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -23,6 +23,15 @@
 #include "vma.h"
 
 struct folio_batch;
+struct hstate;
+struct cma;
+
+struct huge_bootmem_page {
+       struct list_head list;
+       struct hstate *hstate;
+       unsigned long flags;
+       struct cma *cma;
+};
 
 /*
  * Maintains state across a page table move. The operation assumes both source
-- 
2.54.0


Reply via email to