When the kpkeys_hardened_pgtables feature is enabled, make sure that vmemmap page tables are protected by using:
* The standard pagetable_alloc() if the buddy allocator is available, as it already allocates protected memory. * The memblock-based kpkeys allocator for early allocations. These allocators are not NUMA-aware, so the page tables may be allocated on any node. This could potentially incur some overhead on large NUMA systems. The arm64 hotplug code is also amended to use a matching pagetable_free(), ensuring that the pkey is reset when the page tables are freed. x86 already uses pagetable_free() on that path. Unlike in vmemmap_alloc_block(), __GFP_RETRY_MAYFAIL is not used as it isn't justified for allocating page tables - this disables the OOM and we do not have a fallback if we fail to allocate page tables. See previous discussion linked below. Link: https://lore.kernel.org/all/[email protected]/ Signed-off-by: Kevin Brodsky <[email protected]> --- This is a minimal patch to protect vmemmmap page tables. More work may be needed here: * Restoring NUMA awareness * Moving the arm64 change to a separate commit? * General refactoring of how these page tables are allocated: since we are not using the standard per-level functions (e.g. pmd_alloc()), we are not calling pagetable_*_ctor() or ptdesc_set_kernel(). [Maybe that doesn't matter because these page tables can only be freed via vmemmap_free()?] --- arch/arm64/mm/mmu.c | 2 +- mm/sparse-vmemmap.c | 33 +++++++++++++++++++++++++-------- 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 493310cf0486..dc69553d6326 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -1441,7 +1441,7 @@ static void free_hotplug_page_range(struct page *page, size_t size, static void free_hotplug_pgtable_page(struct page *page) { - free_hotplug_page_range(page, PAGE_SIZE, NULL); + pagetable_free(page_ptdesc(page)); } static bool pgtable_range_aligned(unsigned long start, unsigned long end, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 6eadb9d116e4..c93f5b9f4a26 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -184,13 +184,29 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node, return pte; } -static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) +static void * __meminit vmemmap_alloc_pgtable(int node) { - void *p = vmemmap_alloc_block(size, node); + void *p; + + if (slab_is_available()) { + gfp_t gfp = GFP_KERNEL | __GFP_ZERO; + struct ptdesc *ptdesc = pagetable_alloc(gfp, 0); + + return ptdesc ? ptdesc_address(ptdesc) : NULL; + } + + if (kpkeys_hardened_pgtables_early_enabled()) { + phys_addr_t phys = kpkeys_physmem_pgtable_alloc(); + + p = phys ? phys_to_virt(phys) : NULL; + } else { + p = __earlyonly_bootmem_alloc(node, PAGE_SIZE, PAGE_SIZE, + __pa(MAX_DMA_ADDRESS)); + } if (!p) return NULL; - memset(p, 0, size); + memset(p, 0, PAGE_SIZE); return p; } @@ -199,7 +215,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) { pmd_t *pmd = pmd_offset(pud, addr); if (pmd_none(*pmd)) { - void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); + void *p = vmemmap_alloc_pgtable(node); if (!p) return NULL; kernel_pte_init(p); @@ -212,7 +228,7 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { pud_t *pud = pud_offset(p4d, addr); if (pud_none(*pud)) { - void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); + void *p = vmemmap_alloc_pgtable(node); if (!p) return NULL; pmd_init(p); @@ -225,7 +241,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) { p4d_t *p4d = p4d_offset(pgd, addr); if (p4d_none(*p4d)) { - void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); + void *p = vmemmap_alloc_pgtable(node); if (!p) return NULL; pud_init(p); @@ -238,7 +254,7 @@ pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) { pgd_t *pgd = pgd_offset_k(addr); if (pgd_none(*pgd)) { - void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); + void *p = vmemmap_alloc_pgtable(node); if (!p) return NULL; pgd_populate_kernel(addr, pgd, p); @@ -351,10 +367,11 @@ static __meminit struct page *vmemmap_get_tail(unsigned int order, struct zone * * memmap_init(). */ - p = vmemmap_alloc_block_zero(PAGE_SIZE, node); + p = vmemmap_alloc_block(PAGE_SIZE, node); if (!p) return NULL; + memset(p, 0, PAGE_SIZE); tail = virt_to_page(p); zone->vmemmap_tails[idx] = tail; -- 2.51.2
