Try to allocate kernel page tables according to the node of the memory they will map.
Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/include/asm/book3s/64/hash.h | 2 +- arch/powerpc/include/asm/book3s/64/radix.h | 2 +- arch/powerpc/include/asm/sparsemem.h | 2 +- arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/mem.c | 4 +- arch/powerpc/mm/pgtable-book3s64.c | 6 +- arch/powerpc/mm/pgtable-radix.c | 178 +++++++++++++++++++---------- 7 files changed, 128 insertions(+), 68 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0920eff731b3..b1ace9619e94 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -201,7 +201,7 @@ extern int __meminit hash__vmemmap_create_mapping(unsigned long start, extern void hash__vmemmap_remove_mapping(unsigned long start, unsigned long page_size); -int hash__create_section_mapping(unsigned long start, unsigned long end); +int hash__create_section_mapping(unsigned long start, unsigned long end, int nid); int hash__remove_section_mapping(unsigned long start, unsigned long end); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 19c44e1495ae..4edcc797cf43 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -319,7 +319,7 @@ static inline unsigned long radix__get_tree_size(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int radix__create_section_mapping(unsigned long start, unsigned long end); +int radix__create_section_mapping(unsigned long start, unsigned long end, int nid); int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/include/asm/sparsemem.h b/arch/powerpc/include/asm/sparsemem.h index a7916ee6dfb6..bc66712bdc3c 100644 --- a/arch/powerpc/include/asm/sparsemem.h +++ b/arch/powerpc/include/asm/sparsemem.h @@ -17,7 +17,7 @@ #endif /* CONFIG_SPARSEMEM */ #ifdef CONFIG_MEMORY_HOTPLUG -extern int create_section_mapping(unsigned long start, unsigned long end); +extern int create_section_mapping(unsigned long start, unsigned long end, int nid); extern int remove_section_mapping(unsigned long start, unsigned long end); #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 7d07c7e17db6..ceb5494804b2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -781,7 +781,7 @@ void resize_hpt_for_hotplug(unsigned long new_mem_size) } } -int hash__create_section_mapping(unsigned long start, unsigned long end) +int hash__create_section_mapping(unsigned long start, unsigned long end, int nid) { int rc = htab_bolt_mapping(start, end, __pa(start), pgprot_val(PAGE_KERNEL), mmu_linear_psize, diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 70f7b6426a15..3f75fbb10c87 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -117,7 +117,7 @@ int memory_add_physaddr_to_nid(u64 start) } #endif -int __weak create_section_mapping(unsigned long start, unsigned long end) +int __weak create_section_mapping(unsigned long start, unsigned long end, int nid) { return -ENODEV; } @@ -136,7 +136,7 @@ int arch_add_memory(int nid, u64 start, u64 size, bool want_memblock) resize_hpt_for_hotplug(memblock_phys_mem_size()); start = (unsigned long)__va(start); - rc = create_section_mapping(start, start + size); + rc = create_section_mapping(start, start + size, nid); if (rc) { pr_warn("Unable to create mapping for hot added memory 0x%llx..0x%llx: %d\n", start, start + size, rc); diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index 3b65917785a5..2b7375f05408 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -152,12 +152,12 @@ void mmu_cleanup_all(void) } #ifdef CONFIG_MEMORY_HOTPLUG -int create_section_mapping(unsigned long start, unsigned long end) +int create_section_mapping(unsigned long start, unsigned long end, int nid) { if (radix_enabled()) - return radix__create_section_mapping(start, end); + return radix__create_section_mapping(start, end, nid); - return hash__create_section_mapping(start, end); + return hash__create_section_mapping(start, end, nid); } int remove_section_mapping(unsigned long start, unsigned long end) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 573a9a2ee455..716a68baf137 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -46,79 +46,127 @@ static int native_register_process_table(unsigned long base, unsigned long pg_sz return 0; } -static __ref void *early_alloc_pgtable(unsigned long size) +static __ref void *early_alloc_pgtable(unsigned long size, int nid) { + unsigned long pa = 0; void *pt; - pt = __va(memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE)); + if (nid != -1) { + pa = memblock_alloc_base_nid(size, size, + MEMBLOCK_ALLOC_ANYWHERE, + nid, MEMBLOCK_NONE); + } + + if (!pa) + pa = memblock_alloc_base(size, size, MEMBLOCK_ALLOC_ANYWHERE); + + pt = __va(pa); memset(pt, 0, size); return pt; } +static int early_map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size, + int nid); + int radix__map_kernel_page(unsigned long ea, unsigned long pa, pgprot_t flags, unsigned int map_page_size) { + unsigned long pfn = pa >> PAGE_SHIFT; pgd_t *pgdp; pud_t *pudp; pmd_t *pmdp; pte_t *ptep; + /* * Make sure task size is correct as per the max adddr */ BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); - if (slab_is_available()) { - pgdp = pgd_offset_k(ea); - pudp = pud_alloc(&init_mm, pgdp, ea); - if (!pudp) - return -ENOMEM; - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - pmdp = pmd_alloc(&init_mm, pudp, ea); - if (!pmdp) - return -ENOMEM; - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - ptep = pte_alloc_kernel(pmdp, ea); - if (!ptep) - return -ENOMEM; - } else { - pgdp = pgd_offset_k(ea); - if (pgd_none(*pgdp)) { - pudp = early_alloc_pgtable(PUD_TABLE_SIZE); - BUG_ON(pudp == NULL); - pgd_populate(&init_mm, pgdp, pudp); - } - pudp = pud_offset(pgdp, ea); - if (map_page_size == PUD_SIZE) { - ptep = (pte_t *)pudp; - goto set_the_pte; - } - if (pud_none(*pudp)) { - pmdp = early_alloc_pgtable(PMD_TABLE_SIZE); - BUG_ON(pmdp == NULL); - pud_populate(&init_mm, pudp, pmdp); - } - pmdp = pmd_offset(pudp, ea); - if (map_page_size == PMD_SIZE) { - ptep = pmdp_ptep(pmdp); - goto set_the_pte; - } - if (!pmd_present(*pmdp)) { - ptep = early_alloc_pgtable(PAGE_SIZE); - BUG_ON(ptep == NULL); - pmd_populate_kernel(&init_mm, pmdp, ptep); - } - ptep = pte_offset_kernel(pmdp, ea); + if (unlikely(!slab_is_available())) + return early_map_kernel_page(ea, pa, flags, map_page_size, + early_pfn_to_nid(pfn)); + + /* + * Should make page table allocation functions be able to take a + * node, so we can place kernel page tables on the right nodes after + * boot. + */ + pgdp = pgd_offset_k(ea); + pudp = pud_alloc(&init_mm, pgdp, ea); + if (!pudp) + return -ENOMEM; + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; + } + pmdp = pmd_alloc(&init_mm, pudp, ea); + if (!pmdp) + return -ENOMEM; + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; } + ptep = pte_alloc_kernel(pmdp, ea); + if (!ptep) + return -ENOMEM; set_the_pte: - set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, flags)); + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); + smp_wmb(); + return 0; +} + +static int early_map_kernel_page(unsigned long ea, unsigned long pa, + pgprot_t flags, + unsigned int map_page_size, + int nid) +{ + unsigned long pfn = pa >> PAGE_SHIFT; + pgd_t *pgdp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep; + + /* + * Make sure task size is correct as per the max adddr + */ + BUILD_BUG_ON(TASK_SIZE_USER64 > RADIX_PGTABLE_RANGE); + if (slab_is_available()) + return radix__map_kernel_page(ea, pa, flags, map_page_size); + + pgdp = pgd_offset_k(ea); + if (pgd_none(*pgdp)) { + pudp = early_alloc_pgtable(PUD_TABLE_SIZE, nid); + BUG_ON(pudp == NULL); + pgd_populate(&init_mm, pgdp, pudp); + } + pudp = pud_offset(pgdp, ea); + if (map_page_size == PUD_SIZE) { + ptep = (pte_t *)pudp; + goto set_the_pte; + } + if (pud_none(*pudp)) { + pmdp = early_alloc_pgtable(PMD_TABLE_SIZE, nid); + BUG_ON(pmdp == NULL); + pud_populate(&init_mm, pudp, pmdp); + } + pmdp = pmd_offset(pudp, ea); + if (map_page_size == PMD_SIZE) { + ptep = pmdp_ptep(pmdp); + goto set_the_pte; + } + if (!pmd_present(*pmdp)) { + ptep = early_alloc_pgtable(PAGE_SIZE, nid); + BUG_ON(ptep == NULL); + pmd_populate_kernel(&init_mm, pmdp, ptep); + } + ptep = pte_offset_kernel(pmdp, ea); + +set_the_pte: + set_pte_at(&init_mm, ea, ptep, pfn_pte(pfn, flags)); smp_wmb(); return 0; } @@ -209,7 +257,8 @@ static inline void __meminit print_mapping(unsigned long start, } static int __meminit create_physical_mapping(unsigned long start, - unsigned long end) + unsigned long end, + int nid) { unsigned long vaddr, addr, mapping_size = 0; pgprot_t prot; @@ -265,7 +314,7 @@ static int __meminit create_physical_mapping(unsigned long start, else prot = PAGE_KERNEL; - rc = radix__map_kernel_page(vaddr, addr, prot, mapping_size); + rc = early_map_kernel_page(vaddr, addr, prot, mapping_size, nid); if (rc) return rc; } @@ -274,7 +323,7 @@ static int __meminit create_physical_mapping(unsigned long start, return 0; } -static void __init radix_init_pgtable(void) +void __init radix_init_pgtable(void) { unsigned long rts_field; struct memblock_region *reg; @@ -284,9 +333,11 @@ static void __init radix_init_pgtable(void) /* * Create the linear mapping, using standard page size for now */ - for_each_memblock(memory, reg) + for_each_memblock(memory, reg) { WARN_ON(create_physical_mapping(reg->base, - reg->base + reg->size)); + reg->base + reg->size, + reg->nid)); + } /* Find out how many PID bits are supported */ if (cpu_has_feature(CPU_FTR_HVMODE)) { @@ -315,7 +366,7 @@ static void __init radix_init_pgtable(void) * host. */ BUG_ON(PRTB_SIZE_SHIFT > 36); - process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); + process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT, -1); /* * Fill in the process table. */ @@ -790,9 +841,9 @@ static void remove_pagetable(unsigned long start, unsigned long end) radix__flush_tlb_kernel_range(start, end); } -int __ref radix__create_section_mapping(unsigned long start, unsigned long end) +int __ref radix__create_section_mapping(unsigned long start, unsigned long end, int nid) { - return create_physical_mapping(start, end); + return create_physical_mapping(start, end, nid); } int radix__remove_section_mapping(unsigned long start, unsigned long end) @@ -809,8 +860,17 @@ int __meminit radix__vmemmap_create_mapping(unsigned long start, { /* Create a PTE encoding */ unsigned long flags = _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_KERNEL_RW; + int nid = early_pfn_to_nid(phys >> PAGE_SHIFT); + int ret; + + if (!slab_is_available()) + ret = early_map_kernel_page(start, phys, __pgprot(flags), + page_size, nid); + else + ret = radix__map_kernel_page(start, phys, __pgprot(flags), + page_size); + BUG_ON(ret); - BUG_ON(radix__map_kernel_page(start, phys, __pgprot(flags), page_size)); return 0; } -- 2.16.1