When the kpkeys_hardened_pgtables feature is enabled, make sure that
vmemmap page tables are protected by using:

* The standard pagetable_alloc() if the buddy allocator is
  available, as it already allocates protected memory.

* The memblock-based kpkeys allocator for early allocations.

These allocators are not NUMA-aware, so the page tables may be
allocated on any node. This could potentially incur some overhead on
large NUMA systems.

The arm64 hotplug code is also amended to use a matching
pagetable_free(), ensuring that the pkey is reset when the page
tables are freed. x86 already uses pagetable_free() on that path.

Unlike in vmemmap_alloc_block(), __GFP_RETRY_MAYFAIL is not used as
it isn't justified for allocating page tables - this disables the
OOM and we do not have a fallback if we fail to allocate page
tables. See previous discussion linked below.

Link: https://lore.kernel.org/all/[email protected]/
Signed-off-by: Kevin Brodsky <[email protected]>
---

This is a minimal patch to protect vmemmmap page tables. More work
may be needed here:

* Restoring NUMA awareness

* Moving the arm64 change to a separate commit?

* General refactoring of how these page tables are allocated: since
  we are not using the standard per-level functions (e.g.
  pmd_alloc()), we are not calling pagetable_*_ctor() or
  ptdesc_set_kernel(). [Maybe that doesn't matter because these page
  tables can only be freed via vmemmap_free()?]
---
 arch/arm64/mm/mmu.c |  2 +-
 mm/sparse-vmemmap.c | 33 +++++++++++++++++++++++++--------
 2 files changed, 26 insertions(+), 9 deletions(-)

diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c
index 493310cf0486..dc69553d6326 100644
--- a/arch/arm64/mm/mmu.c
+++ b/arch/arm64/mm/mmu.c
@@ -1441,7 +1441,7 @@ static void free_hotplug_page_range(struct page *page, 
size_t size,
 
 static void free_hotplug_pgtable_page(struct page *page)
 {
-       free_hotplug_page_range(page, PAGE_SIZE, NULL);
+       pagetable_free(page_ptdesc(page));
 }
 
 static bool pgtable_range_aligned(unsigned long start, unsigned long end,
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 6eadb9d116e4..c93f5b9f4a26 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -184,13 +184,29 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, 
unsigned long addr, int node,
        return pte;
 }
 
-static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node)
+static void * __meminit vmemmap_alloc_pgtable(int node)
 {
-       void *p = vmemmap_alloc_block(size, node);
+       void *p;
+
+       if (slab_is_available()) {
+               gfp_t gfp = GFP_KERNEL | __GFP_ZERO;
+               struct ptdesc *ptdesc = pagetable_alloc(gfp, 0);
+
+               return ptdesc ? ptdesc_address(ptdesc) : NULL;
+       }
+
+       if (kpkeys_hardened_pgtables_early_enabled()) {
+               phys_addr_t phys = kpkeys_physmem_pgtable_alloc();
+
+               p = phys ? phys_to_virt(phys) : NULL;
+       } else {
+               p = __earlyonly_bootmem_alloc(node, PAGE_SIZE, PAGE_SIZE,
+                                             __pa(MAX_DMA_ADDRESS));
+       }
 
        if (!p)
                return NULL;
-       memset(p, 0, size);
+       memset(p, 0, PAGE_SIZE);
 
        return p;
 }
@@ -199,7 +215,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned 
long addr, int node)
 {
        pmd_t *pmd = pmd_offset(pud, addr);
        if (pmd_none(*pmd)) {
-               void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+               void *p = vmemmap_alloc_pgtable(node);
                if (!p)
                        return NULL;
                kernel_pte_init(p);
@@ -212,7 +228,7 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned 
long addr, int node)
 {
        pud_t *pud = pud_offset(p4d, addr);
        if (pud_none(*pud)) {
-               void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+               void *p = vmemmap_alloc_pgtable(node);
                if (!p)
                        return NULL;
                pmd_init(p);
@@ -225,7 +241,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned 
long addr, int node)
 {
        p4d_t *p4d = p4d_offset(pgd, addr);
        if (p4d_none(*p4d)) {
-               void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+               void *p = vmemmap_alloc_pgtable(node);
                if (!p)
                        return NULL;
                pud_init(p);
@@ -238,7 +254,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, 
int node)
 {
        pgd_t *pgd = pgd_offset_k(addr);
        if (pgd_none(*pgd)) {
-               void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+               void *p = vmemmap_alloc_pgtable(node);
                if (!p)
                        return NULL;
                pgd_populate_kernel(addr, pgd, p);
@@ -351,10 +367,11 @@ static __meminit struct page *vmemmap_get_tail(unsigned 
int order, struct zone *
         * memmap_init().
         */
 
-       p = vmemmap_alloc_block_zero(PAGE_SIZE, node);
+       p = vmemmap_alloc_block(PAGE_SIZE, node);
        if (!p)
                return NULL;
 
+       memset(p, 0, PAGE_SIZE);
        tail = virt_to_page(p);
        zone->vmemmap_tails[idx] = tail;
 

-- 
2.51.2


Reply via email to