[PATCH v3 08/17] memblock: make for_each_memblock_type() iterator private
From: Mike Rapoport for_each_memblock_type() is not used outside mm/memblock.c, move it there from include/linux/memblock.h Signed-off-by: Mike Rapoport Reviewed-by: Baoquan He --- include/linux/memblock.h | 5 - mm/memblock.c| 5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9d925db0d355..550faf69fc1c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -552,11 +552,6 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);\ region++) -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = _type->regions[0]; \ -i < memblock_type->cnt;\ -i++, rgn = _type->regions[i]) - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, diff --git a/mm/memblock.c b/mm/memblock.c index 45f198750be9..59f3998ae5db 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -132,6 +132,11 @@ struct memblock_type physmem = { }; #endif +#define for_each_memblock_type(i, memblock_type, rgn) \ + for (i = 0, rgn = _type->regions[0]; \ +i < memblock_type->cnt;\ +i++, rgn = _type->regions[i]) + int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; -- 2.26.2
[PATCH v3 07/17] mircoblaze: drop unneeded NUMA and sparsemem initializations
From: Mike Rapoport microblaze does not support neither NUMA not SPARSMEM, so there is no point to call memblock_set_node() and sparse_memory_present_with_active_regions() functions during microblaze memory initialization. Remove these calls and the surrounding code. Signed-off-by: Mike Rapoport --- arch/microblaze/mm/init.c | 14 +- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 0880a003573d..49e0c241f9b1 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -105,9 +105,8 @@ static void __init paging_init(void) void __init setup_memory(void) { - struct memblock_region *reg; - #ifndef CONFIG_MMU + struct memblock_region *reg; u32 kernel_align_start, kernel_align_size; /* Find main memory where is the kernel */ @@ -161,17 +160,6 @@ void __init setup_memory(void) pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); - /* Add active regions with valid PFNs */ - for_each_memblock(memory, reg) { - unsigned long start_pfn, end_pfn; - - start_pfn = memblock_region_memory_base_pfn(reg); - end_pfn = memblock_region_memory_end_pfn(reg); - memblock_set_node(start_pfn << PAGE_SHIFT, - (end_pfn - start_pfn) << PAGE_SHIFT, - , 0); - } - paging_init(); } -- 2.26.2
[PATCH v3 06/17] riscv: drop unneeded node initialization
From: Mike Rapoport RISC-V does not (yet) support NUMA and for UMA architectures node 0 is used implicitly during early memory initialization. There is no need to call memblock_set_node(), remove this call and the surrounding code. Signed-off-by: Mike Rapoport --- arch/riscv/mm/init.c | 9 - 1 file changed, 9 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 787c75f751a5..0485cfaacc72 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -191,15 +191,6 @@ void __init setup_bootmem(void) early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn = memblock_region_memory_base_pfn(reg); - unsigned long end_pfn = memblock_region_memory_end_pfn(reg); - - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), - , 0); - } } #ifdef CONFIG_MMU -- 2.26.2
[PATCH v3 05/17] h8300, nds32, openrisc: simplify detection of memory extents
From: Mike Rapoport Instead of traversing memblock.memory regions to find memory_start and memory_end, simply query memblock_{start,end}_of_DRAM(). Signed-off-by: Mike Rapoport Acked-by: Stafford Horne --- arch/h8300/kernel/setup.c| 8 +++- arch/nds32/kernel/setup.c| 8 ++-- arch/openrisc/kernel/setup.c | 9 ++--- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 28ac88358a89..0281f92eea3d 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -74,17 +74,15 @@ static void __init bootmem_init(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) panic("No memory!"); /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ min_low_pfn = PFN_UP(memory_start); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = PFN_DOWN(memory_end); max_pfn = max_low_pfn; memblock_reserve(__pa(_stext), _end - _stext); diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index a066efbe53c0..c356e484dcab 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -249,12 +249,8 @@ static void __init setup_memory(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - pr_info("%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index b18e775f8be3..5a5940f7ebb1 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -48,17 +48,12 @@ static void __init setup_memory(void) unsigned long ram_start_pfn; unsigned long ram_end_pfn; phys_addr_t memory_start, memory_end; - struct memblock_region *region; memory_end = memory_start = 0; /* Find main memory where is the kernel, we assume its the only one */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); -- 2.26.2
[PATCH v3 04/17] arm64: numa: simplify dummy_numa_init()
From: Mike Rapoport dummy_numa_init() loops over memblock.memory and passes nid=0 to numa_add_memblk() which essentially wraps memblock_set_node(). However, memblock_set_node() can cope with entire memory span itself, so the loop over memblock.memory regions is redundant. Using a single call to memblock_set_node() rather than a loop also fixes an issue with a buggy ACPI firmware in which the SRAT table covers some but not all of the memory in the EFI memory map. Jonathan Cameron says: This issue can be easily triggered by having an SRAT table which fails to cover all elements of the EFI memory map. This firmware error is detected and a warning printed. e.g. "NUMA: Warning: invalid memblk node 64 [mem 0x24000-0x27fff]" At that point we fall back to dummy_numa_init(). However, the failed ACPI init has left us with our memblocks all broken up as we split them when trying to assign them to NUMA nodes. We then iterate over the memblocks and add them to node 0. numa_add_memblk() calls memblock_set_node() which merges regions that were previously split up during the earlier attempt to add them to different nodes during parsing of SRAT. This means elements are moved in the memblock array and we can end up in a different memblock after the call to numa_add_memblk(). Result is: Unable to handle kernel paging request at virtual address 3a40 Mem abort info: ESR = 0x9604 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x0004 CM = 0, WnR = 0 [3a40] user address but active_mm is swapper Internal error: Oops: 9604 [#1] PREEMPT SMP ... Call trace: sparse_init_nid+0x5c/0x2b0 sparse_init+0x138/0x170 bootmem_init+0x80/0xe0 setup_arch+0x2a0/0x5fc start_kernel+0x8c/0x648 Replace the loop with a single call to memblock_set_node() to the entire memory. Signed-off-by: Mike Rapoport Acked-by: Jonathan Cameron Acked-by: Catalin Marinas --- arch/arm64/mm/numa.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 73f8b49d485c..8a97cd3d2dfe 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void)) */ static int __init dummy_numa_init(void) { + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM(); int ret; - struct memblock_region *mblk; if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); - - for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); - if (!ret) - continue; + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1); + ret = numa_add_memblk(0, start, end); + if (ret) { pr_err("NUMA init failed\n"); return ret; } -- 2.26.2
[PATCH v3 03/17] arm, xtensa: simplify initialization of high memory pages
From: Mike Rapoport The function free_highpages() in both arm and xtensa essentially open-code for_each_free_mem_range() loop to detect high memory pages that were not reserved and that should be initialized and passed to the buddy allocator. Replace open-coded implementation of for_each_free_mem_range() with usage of memblock API to simplify the code. Signed-off-by: Mike Rapoport Reviewed-by: Max Filippov # xtensa Tested-by: Max Filippov # xtensa --- arch/arm/mm/init.c| 48 +++-- arch/xtensa/mm/init.c | 55 --- 2 files changed, 18 insertions(+), 85 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 000c1b48e973..50a5a30a78ff 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -347,61 +347,29 @@ static void __init free_unused_memmap(void) #endif } -#ifdef CONFIG_HIGHMEM -static inline void free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} -#endif - static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { - unsigned long res_start, res_end; - - res_start = memblock_region_reserved_base_pfn(res); - res_end = memblock_region_reserved_end_pfn(res); - - if (res_end < start) - continue; - if (res_start < start) - res_start = start; - if (res_start > end) - res_start = end; - if (res_end > end) - res_end = end; - if (res_start != start) - free_area_high(start, res_start); - start = res_end; - if (start == end) - break; - } - - /* And now free anything which remains */ - if (start < end) - free_area_high(start, end); + for (; start < end; start++) + free_highmem_page(pfn_to_page(start)); } #endif } diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index a05b306cf371..ad9d59d93f39 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,67 +79,32 @@ void __init zones_init(void) free_area_init(max_zone_pfn); } -#ifdef CONFIG_HIGHMEM -static void __init free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} - static void __init free_highpages(void) { +#ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; - reset_all_zones_managed_pages(); /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { -
[PATCH v3 02/17] dma-contiguous: simplify cma_early_percent_memory()
From: Mike Rapoport The memory size calculation in cma_early_percent_memory() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport Reviewed-by: Christoph Hellwig Reviewed-by: Baoquan He --- kernel/dma/contiguous.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index cff7e60968b9..0369fd5fda8f 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -73,16 +73,7 @@ early_param("cma", early_cma); static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) { - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); + unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size()); return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; } -- 2.26.2
[PATCH v3 01/17] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
From: Mike Rapoport The memory size calculation in kvm_cma_reserve() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport --- arch/powerpc/kvm/book3s_hv_builtin.c | 12 ++-- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 073617ce83e0..8f58dd20b362 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -95,23 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma); void __init kvm_cma_reserve(void) { unsigned long align_size; - struct memblock_region *reg; - phys_addr_t selected_size = 0; + phys_addr_t selected_size; /* * We need CMA reservation only when we are in HV mode */ if (!cpu_has_feature(CPU_FTR_HVMODE)) return; - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - selected_size += memblock_region_memory_end_pfn(reg) - -memblock_region_memory_base_pfn(reg); - selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; + selected_size = PAGE_ALIGN(memblock_phys_mem_size() * kvm_cma_resv_ratio / 100); if (selected_size) { pr_info("%s: reserving %ld MiB for global area\n", __func__, (unsigned long)selected_size / SZ_1M); -- 2.26.2
[PATCH v3 00/17] memblock: seasonal cleaning^w cleanup
From: Mike Rapoport Hi, These patches simplify several uses of memblock iterators and hide some of the memblock implementation details from the rest of the system. The patches are on top of v5.9-rc1 v3 changes: * rebase on v5.9-rc1, as the result this required some non-trivial changes in patches 10 and 16. I didn't add Baoquan's Reviewed-by to theses patches, but I keept Thomas and Miguel * Add Acked-by from Thomas and Miguel as there were changes in MIPS and only trivial changes in .clang-format * Added Reviewed-by from Baoquan except for the patches 10 and 16 * Fixed misc build errors and warnings reported by kbuild bot * Updated PowerPC KVM reservation size (patch 2), as per Daniel's comment v2 changes: * replace for_each_memblock() with two versions, one for memblock.memory and another one for memblock.reserved * fix overzealous cleanup of powerpc fadamp: keep the traversal over the memblocks, but use better suited iterators * don't remove traversal over memblock.reserved in x86 numa cleanup but replace for_each_memblock() with new for_each_reserved_mem_region() * simplify ramdisk and crash kernel allocations on x86 * drop more redundant and unused code: __next_reserved_mem_region() and memblock_mem_size() * add description of numa initialization fix on arm64 (thanks Jonathan) * add Acked and Reviewed tags Mike Rapoport (17): KVM: PPC: Book3S HV: simplify kvm_cma_reserve() dma-contiguous: simplify cma_early_percent_memory() arm, xtensa: simplify initialization of high memory pages arm64: numa: simplify dummy_numa_init() h8300, nds32, openrisc: simplify detection of memory extents riscv: drop unneeded node initialization mircoblaze: drop unneeded NUMA and sparsemem initializations memblock: make for_each_memblock_type() iterator private memblock: make memblock_debug and related functionality private memblock: reduce number of parameters in for_each_mem_range() arch, mm: replace for_each_memblock() with for_each_mem_pfn_range() arch, drivers: replace for_each_membock() with for_each_mem_range() x86/setup: simplify initrd relocation and reservation x86/setup: simplify reserve_crashkernel() memblock: remove unused memblock_mem_size() memblock: implement for_each_reserved_mem_region() using __next_mem_region() memblock: use separate iterators for memory and reserved regions .clang-format| 5 +- arch/arm/kernel/setup.c | 18 +++-- arch/arm/mm/init.c | 59 +++ arch/arm/mm/mmu.c| 39 -- arch/arm/mm/pmsa-v7.c| 23 +++--- arch/arm/mm/pmsa-v8.c| 17 ++--- arch/arm/xen/mm.c| 7 +- arch/arm64/kernel/machine_kexec_file.c | 6 +- arch/arm64/kernel/setup.c| 4 +- arch/arm64/mm/init.c | 11 +-- arch/arm64/mm/kasan_init.c | 10 +-- arch/arm64/mm/mmu.c | 11 +-- arch/arm64/mm/numa.c | 15 ++-- arch/c6x/kernel/setup.c | 9 ++- arch/h8300/kernel/setup.c| 8 +- arch/microblaze/mm/init.c| 21 ++ arch/mips/cavium-octeon/dma-octeon.c | 12 +-- arch/mips/kernel/setup.c | 31 arch/mips/netlogic/xlp/setup.c | 2 +- arch/nds32/kernel/setup.c| 8 +- arch/openrisc/kernel/setup.c | 9 +-- arch/openrisc/mm/init.c | 8 +- arch/powerpc/kernel/fadump.c | 57 +++--- arch/powerpc/kexec/file_load_64.c| 16 ++-- arch/powerpc/kvm/book3s_hv_builtin.c | 12 +-- arch/powerpc/mm/book3s64/hash_utils.c| 16 ++-- arch/powerpc/mm/book3s64/radix_pgtable.c | 10 +-- arch/powerpc/mm/kasan/kasan_init_32.c| 8 +- arch/powerpc/mm/mem.c| 33 arch/powerpc/mm/numa.c | 7 +- arch/powerpc/mm/pgtable_32.c | 8 +- arch/riscv/mm/init.c | 36 +++-- arch/riscv/mm/kasan_init.c | 10 +-- arch/s390/kernel/setup.c | 27 --- arch/s390/mm/page-states.c | 6 +- arch/s390/mm/vmem.c | 7 +- arch/sh/mm/init.c| 9 +-- arch/sparc/mm/init_64.c | 12 +-- arch/x86/kernel/setup.c | 56 +- arch/x86/mm/numa.c | 2 +- arch/xtensa/mm/init.c| 55 +++--- drivers/bus/mvebu-mbus.c | 12 +-- drivers/irqchip/irq-gic-v3-its.c | 2 +- include/linux/memblock.h | 88 +- kernel/dma/contiguous.c | 11 +-- mm/memblock.c| 95 ++-- mm/page_alloc.c | 11 ++- mm/sparse.c | 10 +-- 48 files changed, 387 insertions(+), 562 deletions(-) -- 2.26.2
Re: [PATCH v2 13/17] x86/setup: simplify initrd relocation and reservation
On Wed, Aug 05, 2020 at 12:20:24PM +0800, Baoquan He wrote: > On 08/02/20 at 07:35pm, Mike Rapoport wrote: > > From: Mike Rapoport > > > > Currently, initrd image is reserved very early during setup and then it > > might be relocated and re-reserved after the initial physical memory > > mapping is created. The "late" reservation of memblock verifies that mapped > > memory size exceeds the size of initrd, the checks whether the relocation > ~ then? Right, thanks! > > required and, if yes, relocates inirtd to a new memory allocated from > > memblock and frees the old location.
[PATCH v2 17/17] memblock: use separate iterators for memory and reserved regions
From: Mike Rapoport for_each_memblock() is used to iterate over memblock.memory in a few places that use data from memblock_region rather than the memory ranges. Introduce separate for_each_mem_region() and for_each_reserved_mem_region() to improve encapsulation of memblock internals from its users. Signed-off-by: Mike Rapoport --- .clang-format | 3 ++- arch/arm64/kernel/setup.c | 2 +- arch/arm64/mm/numa.c | 2 +- arch/mips/netlogic/xlp/setup.c | 2 +- arch/x86/mm/numa.c | 2 +- include/linux/memblock.h | 19 --- mm/memblock.c | 4 ++-- mm/page_alloc.c| 8 8 files changed, 28 insertions(+), 14 deletions(-) diff --git a/.clang-format b/.clang-format index e28a849a1c58..cff71d345456 100644 --- a/.clang-format +++ b/.clang-format @@ -201,7 +201,7 @@ ForEachMacros: - 'for_each_matching_node' - 'for_each_matching_node_and_match' - 'for_each_member' - - 'for_each_memblock' + - 'for_each_mem_region' - 'for_each_memblock_type' - 'for_each_memcg_cache_index' - 'for_each_mem_pfn_range' @@ -267,6 +267,7 @@ ForEachMacros: - 'for_each_property_of_node' - 'for_each_registered_fb' - 'for_each_reserved_mem_range' + - 'for_each_reserved_mem_region' - 'for_each_rtd_codec_dais' - 'for_each_rtd_codec_dais_rollback' - 'for_each_rtd_components' diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f3aec7244aab..52ea2f1a7184 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -217,7 +217,7 @@ static void __init request_standard_resources(void) if (!standard_resources) panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); - for_each_memblock(memory, region) { + for_each_mem_region(region) { res = _resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0cbdbcc885fb..f121e42246a6 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -350,7 +350,7 @@ static int __init numa_register_nodes(void) struct memblock_region *mblk; /* Check that valid nid is set to memblks */ - for_each_memblock(memory, mblk) { + for_each_mem_region(mblk) { int mblk_nid = memblock_get_region_node(mblk); if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) { diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index 1a0fc5b62ba4..6e3102bcd2f1 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -70,7 +70,7 @@ static void nlm_fixup_mem(void) const int pref_backup = 512; struct memblock_region *mem; - for_each_memblock(memory, mem) { + for_each_mem_region(mem) { memblock_remove(mem->base + mem->size - pref_backup, pref_backup); } diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8ee952038c80..fe6ea18d6923 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -516,7 +516,7 @@ static void __init numa_clear_kernel_node_hotplug(void) * memory ranges, because quirks such as trim_snb_memory() * reserve specific pages for Sandy Bridge graphics. ] */ - for_each_memblock(reserved, mb_region) { + for_each_reserved_mem_region(mb_region) { int nid = memblock_get_region_node(mb_region); if (nid != MAX_NUMNODES) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9e51b3fd4134..a6970e058bd7 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -522,9 +522,22 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo return PFN_UP(reg->base + reg->size); } -#define for_each_memblock(memblock_type, region) \ - for (region = memblock.memblock_type.regions; \ -region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);\ +/** + * for_each_mem_region - itereate over registered memory regions + * @region: loop variable + */ +#define for_each_mem_region(region)\ + for (region = memblock.memory.regions; \ +region < (memblock.memory.regions + memblock.memory.cnt); \ +region++) + +/** + * for_each_reserved_mem_region - itereate over reserved memory regions + * @region: loop variable + */ +#define for_each_reserved_mem_region(region) \ + for (region = memblock.reserved.regions;\ +region < (memblock.reserved.regions + memblock.reserved.cnt); \ region++) extern void *alloc_large_system_hash(const char *tablen
[PATCH v2 16/17] memblock: implement for_each_reserved_mem_region() using __next_mem_region()
From: Mike Rapoport Iteration over memblock.reserved with for_each_reserved_mem_region() used __next_reserved_mem_region() that implemented a subset of __next_mem_region(). Use __for_each_mem_range() and, essentially, __next_mem_region() with appropriate parameters to reduce code duplication. While on it, rename for_each_reserved_mem_region() to for_each_reserved_mem_range() for consistency. Signed-off-by: Mike Rapoport --- .clang-format| 2 +- arch/arm64/kernel/setup.c| 2 +- drivers/irqchip/irq-gic-v3-its.c | 2 +- include/linux/memblock.h | 12 +++-- mm/memblock.c| 46 +++- 5 files changed, 17 insertions(+), 47 deletions(-) diff --git a/.clang-format b/.clang-format index 52ededab25ce..e28a849a1c58 100644 --- a/.clang-format +++ b/.clang-format @@ -266,7 +266,7 @@ ForEachMacros: - 'for_each_process_thread' - 'for_each_property_of_node' - 'for_each_registered_fb' - - 'for_each_reserved_mem_region' + - 'for_each_reserved_mem_range' - 'for_each_rtd_codec_dais' - 'for_each_rtd_codec_dais_rollback' - 'for_each_rtd_components' diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..f3aec7244aab 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -257,7 +257,7 @@ static int __init reserve_memblock_reserved_regions(void) if (!memblock_is_region_reserved(mem->start, mem_size)) continue; - for_each_reserved_mem_region(j, _start, _end) { + for_each_reserved_mem_range(j, _start, _end) { resource_size_t start, end; start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start); diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index beac4caefad9..9971fd8cf6b6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -2192,7 +2192,7 @@ static bool gic_check_reserved_range(phys_addr_t addr, unsigned long size) addr_end = addr + size - 1; - for_each_reserved_mem_region(i, , ) { + for_each_reserved_mem_range(i, , ) { if (addr >= start && addr_end <= end) return true; } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ec2fd8f32a19..9e51b3fd4134 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -136,9 +136,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, - phys_addr_t *out_end); - void __memblock_free_late(phys_addr_t base, phys_addr_t size); /** @@ -193,7 +190,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); MEMBLOCK_NONE, p_start, p_end, NULL) /** - * for_each_reserved_mem_region - iterate over all reserved memblock areas + * for_each_reserved_mem_range - iterate over all reserved memblock areas * @i: u64 used as loop variable * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL @@ -201,10 +198,9 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); * Walks over reserved areas of memblock. Available as soon as memblock * is initialized. */ -#define for_each_reserved_mem_region(i, p_start, p_end) \ - for (i = 0UL, __next_reserved_mem_region(, p_start, p_end); \ -i != (u64)ULLONG_MAX; \ -__next_reserved_mem_region(, p_start, p_end)) +#define for_each_reserved_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, , NULL, NUMA_NO_NODE, \ +MEMBLOCK_NONE, p_start, p_end, NULL) static inline bool memblock_is_hotpluggable(struct memblock_region *m) { diff --git a/mm/memblock.c b/mm/memblock.c index 48d614352b25..dadf579f7c53 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -946,42 +946,16 @@ int __init_memblock memblock_clear_nomap(phys_addr_t base, phys_addr_t size) return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP); } -/** - * __next_reserved_mem_region - next function for for_each_reserved_region() - * @idx: pointer to u64 loop variable - * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL - * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL - * - * Iterate over all reserved memory regions. - */ -void __init_memblock __next_reserved_mem_region(u64 *idx, - phys_addr_t *out_start, - phys_addr_t *out_end) -{ -
[PATCH v2 15/17] memblock: remove unused memblock_mem_size()
From: Mike Rapoport The only user of memblock_mem_size() was x86 setup code, it is gone now and memblock_mem_size() funciton can be removed. Signed-off-by: Mike Rapoport --- include/linux/memblock.h | 1 - mm/memblock.c| 15 --- 2 files changed, 16 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d70c2835e913..ec2fd8f32a19 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -450,7 +450,6 @@ static inline bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); -phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); diff --git a/mm/memblock.c b/mm/memblock.c index c1a4c8798973..48d614352b25 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1656,21 +1656,6 @@ phys_addr_t __init_memblock memblock_reserved_size(void) return memblock.reserved.total_size; } -phys_addr_t __init memblock_mem_size(unsigned long limit_pfn) -{ - unsigned long pages = 0; - unsigned long start_pfn, end_pfn; - int i; - - for_each_mem_pfn_range(i, MAX_NUMNODES, _pfn, _pfn, NULL) { - start_pfn = min_t(unsigned long, start_pfn, limit_pfn); - end_pfn = min_t(unsigned long, end_pfn, limit_pfn); - pages += end_pfn - start_pfn; - } - - return PFN_PHYS(pages); -} - /* lowest address */ phys_addr_t __init_memblock memblock_start_of_DRAM(void) { -- 2.26.2
[PATCH v2 14/17] x86/setup: simplify reserve_crashkernel()
From: Mike Rapoport * Replace magic numbers with defines * Replace memblock_find_in_range() + memblock_reserve() with memblock_phys_alloc_range() * Stop checking for low memory size in reserve_crashkernel_low(). The allocation from limited range will anyway fail if there is no enough memory, so there is no need for extra traversal of memblock.memory Signed-off-by: Mike Rapoport --- arch/x86/kernel/setup.c | 40 ++-- 1 file changed, 14 insertions(+), 26 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d8de4053c5e8..d7ced6982524 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -419,13 +419,13 @@ static int __init reserve_crashkernel_low(void) { #ifdef CONFIG_X86_64 unsigned long long base, low_base = 0, low_size = 0; - unsigned long total_low_mem; + unsigned long low_mem_limit; int ret; - total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT)); + low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX); /* crashkernel=Y,low */ - ret = parse_crashkernel_low(boot_command_line, total_low_mem, _size, ); + ret = parse_crashkernel_low(boot_command_line, low_mem_limit, _size, ); if (ret) { /* * two parts from kernel/dma/swiotlb.c: @@ -443,23 +443,17 @@ static int __init reserve_crashkernel_low(void) return 0; } - low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN); + low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, CRASH_ADDR_LOW_MAX); if (!low_base) { pr_err("Cannot reserve %ldMB crashkernel low memory, please try smaller size.\n", (unsigned long)(low_size >> 20)); return -ENOMEM; } - ret = memblock_reserve(low_base, low_size); - if (ret) { - pr_err("%s: Error reserving crashkernel low memblock.\n", __func__); - return ret; - } - - pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System low RAM: %ldMB)\n", + pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low RAM limit: %ldMB)\n", (unsigned long)(low_size >> 20), (unsigned long)(low_base >> 20), - (unsigned long)(total_low_mem >> 20)); + (unsigned long)(low_mem_limit >> 20)); crashk_low_res.start = low_base; crashk_low_res.end = low_base + low_size - 1; @@ -503,13 +497,13 @@ static void __init reserve_crashkernel(void) * unless "crashkernel=size[KMG],high" is specified. */ if (!high) - crash_base = memblock_find_in_range(CRASH_ALIGN, - CRASH_ADDR_LOW_MAX, - crash_size, CRASH_ALIGN); + crash_base = memblock_phys_alloc_range(crash_size, + CRASH_ALIGN, CRASH_ALIGN, + CRASH_ADDR_LOW_MAX); if (!crash_base) - crash_base = memblock_find_in_range(CRASH_ALIGN, - CRASH_ADDR_HIGH_MAX, - crash_size, CRASH_ALIGN); + crash_base = memblock_phys_alloc_range(crash_size, + CRASH_ALIGN, CRASH_ALIGN, + CRASH_ADDR_HIGH_MAX); if (!crash_base) { pr_info("crashkernel reservation failed - No suitable area found.\n"); return; @@ -517,19 +511,13 @@ static void __init reserve_crashkernel(void) } else { unsigned long long start; - start = memblock_find_in_range(crash_base, - crash_base + crash_size, - crash_size, 1 << 20); + start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base, + crash_base + crash_size); if (start != crash_base) { pr_info("crashkernel reservation failed - memory is in use.\n"); return; } } - ret = memblock_reserve(crash_base, crash_size); - if (ret) { - pr_err("%s: Error reserving crashkernel memblock.\n", __func__); - return; - } if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) { memblock_free(crash_base, crash_size); -- 2.26.2
[PATCH v2 13/17] x86/setup: simplify initrd relocation and reservation
From: Mike Rapoport Currently, initrd image is reserved very early during setup and then it might be relocated and re-reserved after the initial physical memory mapping is created. The "late" reservation of memblock verifies that mapped memory size exceeds the size of initrd, the checks whether the relocation required and, if yes, relocates inirtd to a new memory allocated from memblock and frees the old location. The check for memory size is excessive as memblock allocation will anyway fail if there is not enough memory. Besides, there is no point to allocate memory from memblock using memblock_find_in_range() + memblock_reserve() when there exists memblock_phys_alloc_range() with required functionality. Remove the redundant check and simplify memblock allocation. Signed-off-by: Mike Rapoport --- arch/x86/kernel/setup.c | 16 +++- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index a3767e74c758..d8de4053c5e8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -262,16 +262,12 @@ static void __init relocate_initrd(void) u64 area_size = PAGE_ALIGN(ramdisk_size); /* We need to move the initrd down into directly mapped mem */ - relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped), - area_size, PAGE_SIZE); - + relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0, + PFN_PHYS(max_pfn_mapped)); if (!relocated_ramdisk) panic("Cannot find place for new RAMDISK of size %lld\n", ramdisk_size); - /* Note: this includes all the mem currently occupied by - the initrd, we rely on that fact to keep the data intact. */ - memblock_reserve(relocated_ramdisk, area_size); initrd_start = relocated_ramdisk + PAGE_OFFSET; initrd_end = initrd_start + ramdisk_size; printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n", @@ -298,13 +294,13 @@ static void __init early_reserve_initrd(void) memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image); } + static void __init reserve_initrd(void) { /* Assume only end is not page aligned */ u64 ramdisk_image = get_ramdisk_image(); u64 ramdisk_size = get_ramdisk_size(); u64 ramdisk_end = PAGE_ALIGN(ramdisk_image + ramdisk_size); - u64 mapped_size; if (!boot_params.hdr.type_of_loader || !ramdisk_image || !ramdisk_size) @@ -312,12 +308,6 @@ static void __init reserve_initrd(void) initrd_start = 0; - mapped_size = memblock_mem_size(max_pfn_mapped); - if (ramdisk_size >= (mapped_size>>1)) - panic("initrd too large to handle, " - "disabling initrd (%lld needed, %lld available)\n", - ramdisk_size, mapped_size>>1); - printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image, ramdisk_end - 1); -- 2.26.2
[PATCH v2 12/17] arch, drivers: replace for_each_membock() with for_each_mem_range()
From: Mike Rapoport There are several occurrences of the following pattern: for_each_memblock(memory, reg) { start = __pfn_to_phys(memblock_region_memory_base_pfn(reg); end = __pfn_to_phys(memblock_region_memory_end_pfn(reg)); /* do something with start and end */ } Using for_each_mem_range() iterator is more appropriate in such cases and allows simpler and cleaner code. Signed-off-by: Mike Rapoport --- arch/arm/kernel/setup.c | 18 ++--- arch/arm/mm/mmu.c| 39 ++ arch/arm/mm/pmsa-v7.c| 20 +- arch/arm/mm/pmsa-v8.c| 17 arch/arm/xen/mm.c| 7 ++-- arch/arm64/mm/kasan_init.c | 10 ++--- arch/arm64/mm/mmu.c | 11 ++ arch/c6x/kernel/setup.c | 9 +++-- arch/microblaze/mm/init.c| 9 +++-- arch/mips/cavium-octeon/dma-octeon.c | 12 +++--- arch/mips/kernel/setup.c | 31 +++ arch/openrisc/mm/init.c | 8 ++-- arch/powerpc/kernel/fadump.c | 50 +++- arch/powerpc/mm/book3s64/hash_utils.c| 16 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 +++--- arch/powerpc/mm/kasan/kasan_init_32.c| 8 ++-- arch/powerpc/mm/mem.c| 16 +--- arch/powerpc/mm/pgtable_32.c | 8 ++-- arch/riscv/mm/init.c | 25 +--- arch/riscv/mm/kasan_init.c | 10 ++--- arch/s390/kernel/setup.c | 27 - arch/s390/mm/vmem.c | 16 arch/sparc/mm/init_64.c | 12 ++ drivers/bus/mvebu-mbus.c | 12 +++--- drivers/s390/char/zcore.c| 9 +++-- 25 files changed, 200 insertions(+), 211 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d8e18cdd96d3..3f65d0ac9f63 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -843,19 +843,25 @@ early_param("mem", early_mem); static void __init request_standard_resources(const struct machine_desc *mdesc) { - struct memblock_region *region; + phys_addr_t start, end, res_end; struct resource *res; + u64 i; kernel_code.start = virt_to_phys(_text); kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); - for_each_memblock(memory, region) { - phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + for_each_mem_range(i, , ) { unsigned long boot_alias_start; + /* +* In memblock, end points to the first byte after the +* range while in resourses, end points to the last byte in +* the range. +*/ + res_end = end - 1; + /* * Some systems have a special memory alias which is only * used for booting. We need to advertise this region to @@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) __func__, sizeof(*res)); res->name = "System RAM (boot alias)"; res->start = boot_alias_start; - res->end = phys_to_idmap(end); + res->end = phys_to_idmap(res_end); res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(_resource, res); } @@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) sizeof(*res)); res->name = "System RAM"; res->start = start; - res->end = end; + res->end = res_end; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(_resource, res); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 628028bfbb92..a149d9cb4fdb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1155,9 +1155,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0; void __init adjust_lowmem_bounds(void) { - phys_addr_t memblock_limit = 0; - u64 vmalloc_limit; - struct memblock_region *reg; + phys_addr_t block_start, block_end, memblock_limit = 0; + u64 vmalloc_limit, i; phys_addr_t lowmem_limit = 0; /* @@ -1173,26 +1172,18 @@ void __init adjust_lowmem_bounds(void) * The first usable region must be PMD aligned. Mark its start * as MEMB
[PATCH v2 11/17] arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
From: Mike Rapoport There are several occurrences of the following pattern: for_each_memblock(memory, reg) { start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); /* do something with start_pfn and end_pfn */ } Rather than iterate over all memblock.memory regions and each time query for their start and end PFNs, use for_each_mem_pfn_range() iterator to get simpler and clearer code. Signed-off-by: Mike Rapoport --- arch/arm/mm/init.c | 11 --- arch/arm64/mm/init.c | 11 --- arch/powerpc/kernel/fadump.c | 11 ++- arch/powerpc/mm/mem.c| 15 --- arch/powerpc/mm/numa.c | 7 ++- arch/s390/mm/page-states.c | 6 ++ arch/sh/mm/init.c| 9 +++-- mm/memblock.c| 6 ++ mm/sparse.c | 10 -- 9 files changed, 35 insertions(+), 51 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 626af348eb8f..d630573277d1 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -304,16 +304,14 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) */ static void __init free_unused_memmap(void) { - unsigned long start, prev_end = 0; - struct memblock_region *reg; + unsigned long start, end, prev_end = 0; + int i; /* * This relies on each bank being in address order. * The banks are sorted previously in bootmem_init(). */ - for_each_memblock(memory, reg) { - start = memblock_region_memory_base_pfn(reg); - + for_each_mem_pfn_range(i, MAX_NUMNODES, , , NULL) { #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist @@ -341,8 +339,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(memblock_region_memory_end_pfn(reg), -MAX_ORDER_NR_PAGES); + prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); } #ifdef CONFIG_SPARSEMEM diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..291b5805457d 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -473,12 +473,10 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) */ static void __init free_unused_memmap(void) { - unsigned long start, prev_end = 0; - struct memblock_region *reg; - - for_each_memblock(memory, reg) { - start = __phys_to_pfn(reg->base); + unsigned long start, end, prev_end = 0; + int i; + for_each_mem_pfn_range(i, MAX_NUMNODES, , , NULL) { #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist due @@ -498,8 +496,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), -MAX_ORDER_NR_PAGES); + prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); } #ifdef CONFIG_SPARSEMEM diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 78ab9a6ee6ac..fc85cbc66839 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1216,14 +1216,15 @@ static void fadump_free_reserved_memory(unsigned long start_pfn, */ static void fadump_release_reserved_area(u64 start, u64 end) { - u64 tstart, tend, spfn, epfn; - struct memblock_region *reg; + u64 tstart, tend, spfn, epfn, reg_spfn, reg_epfn, i; spfn = PHYS_PFN(start); epfn = PHYS_PFN(end); - for_each_memblock(memory, reg) { - tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg)); - tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg)); + + for_each_mem_pfn_range(i, MAX_NUMNODES, _spfn, _epfn, NULL) { + tstart = max_t(u64, spfn, reg_spfn); + tend = min_t(u64, epfn, reg_epfn); + if (tstart < tend) { fadump_free_reserved_memory(tstart, tend); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c2c11eb8dcfc..1364dd532107 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -192,15 +192,16 @@ void __init initmem_init(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - struct memblock_region *reg, *prev = NULL; + unsigned long spfn, epfn, prev = 0; + int i; - for_each_memblock(memory, reg) { - if (prev && - memblock_region
[PATCH v2 10/17] memblock: reduce number of parameters in for_each_mem_range()
From: Mike Rapoport Currently for_each_mem_range() iterator is the most generic way to traverse memblock regions. As such, it has 8 parameters and it is hardly convenient to users. Most users choose to utilize one of its wrappers and the only user that actually needs most of the parameters outside memblock is s390 crash dump implementation. To avoid yet another naming for memblock iterators, rename the existing for_each_mem_range() to __for_each_mem_range() and add a new for_each_mem_range() wrapper with only index, start and end parameters. The new wrapper nicely fits into init_unavailable_mem() and will be used in upcoming changes to simplify memblock traversals. Signed-off-by: Mike Rapoport Reviewed-by: Baoquan He --- .clang-format | 1 + arch/arm64/kernel/machine_kexec_file.c | 6 ++ arch/s390/kernel/crash_dump.c | 8 include/linux/memblock.h | 18 ++ mm/page_alloc.c| 3 +-- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.clang-format b/.clang-format index a0a96088c74f..52ededab25ce 100644 --- a/.clang-format +++ b/.clang-format @@ -205,6 +205,7 @@ ForEachMacros: - 'for_each_memblock_type' - 'for_each_memcg_cache_index' - 'for_each_mem_pfn_range' + - '__for_each_mem_range' - 'for_each_mem_range' - 'for_each_mem_range_rev' - 'for_each_migratetype_order' diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 361a1143e09e..5b0e67b93cdc 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) phys_addr_t start, end; nr_ranges = 1; /* for exclusion of crashkernel region */ - for_each_mem_range(i, , NULL, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) + for_each_mem_range(i, , ) nr_ranges++; cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); @@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; - for_each_mem_range(i, , NULL, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) { + for_each_mem_range(i, , ) { cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f96a5857bbfd..e28085c725ff 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -549,8 +549,8 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_mem_range(idx, , _type, NUMA_NO_NODE, - MEMBLOCK_NONE, NULL, NULL, NULL) + __for_each_mem_range(idx, , _type, NUMA_NO_NODE, +MEMBLOCK_NONE, NULL, NULL, NULL) cnt++; return cnt; } @@ -563,8 +563,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_mem_range(idx, , _type, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) { + __for_each_mem_range(idx, , _type, NUMA_NO_NODE, +MEMBLOCK_NONE, , , NULL) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e6a23b3db696..d70c2835e913 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -142,7 +142,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, void __memblock_free_late(phys_addr_t base, phys_addr_t size); /** - * for_each_mem_range - iterate through memblock areas from type_a and not + * __for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -153,7 +153,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range(i, type_a, type_b, nid, flags, \ +#define __for_each_mem_range(i, type_a, type_b, nid, flags,\ p_start, p_end, p_nid) \ for (i = 0, __next_mem_range(, nid, flags, type_a, type_b,\ p_start, p_end, p_nid);\ @@ -182,6 +182,16 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); __next_mem_range_re
[PATCH v2 09/17] memblock: make memblock_debug and related functionality private
From: Mike Rapoport The only user of memblock_dbg() outside memblock was s390 setup code and it is converted to use pr_debug() instead. This allows to stop exposing memblock_debug and memblock_dbg() to the rest of the kernel. Signed-off-by: Mike Rapoport Reviewed-by: Baoquan He --- arch/s390/kernel/setup.c | 4 ++-- include/linux/memblock.h | 12 +--- mm/memblock.c| 13 +++-- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 07aa15ba43b3..8b284cf6e199 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -776,8 +776,8 @@ static void __init memblock_add_mem_detect_info(void) unsigned long start, end; int i; - memblock_dbg("physmem info source: %s (%hhd)\n", -get_mem_info_source(), mem_detect.info_source); + pr_debug("physmem info source: %s (%hhd)\n", +get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_mem_detect_block(i, , ) { diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 220b5f0dad42..e6a23b3db696 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -90,7 +90,6 @@ struct memblock { }; extern struct memblock memblock; -extern int memblock_debug; #ifndef CONFIG_ARCH_KEEP_MEMBLOCK #define __init_memblock __meminit @@ -102,9 +101,6 @@ void memblock_discard(void); static inline void memblock_discard(void) {} #endif -#define memblock_dbg(fmt, ...) \ - if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) - phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -456,13 +452,7 @@ bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size); bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -extern void __memblock_dump_all(void); - -static inline void memblock_dump_all(void) -{ - if (memblock_debug) - __memblock_dump_all(); -} +void memblock_dump_all(void); /** * memblock_set_current_limit - Set the current allocation limit to allow diff --git a/mm/memblock.c b/mm/memblock.c index a5b9b3df81fc..824938849f6d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -134,7 +134,10 @@ struct memblock memblock __initdata_memblock = { i < memblock_type->cnt;\ i++, rgn = _type->regions[i]) -int memblock_debug __initdata_memblock; +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +static int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; @@ -1919,7 +1922,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } -void __init_memblock __memblock_dump_all(void) +static void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); pr_info(" memory size = %pa reserved size = %pa\n", @@ -1933,6 +1936,12 @@ void __init_memblock __memblock_dump_all(void) #endif } +void __init_memblock memblock_dump_all(void) +{ + if (memblock_debug) + __memblock_dump_all(); +} + void __init memblock_allow_resize(void) { memblock_can_resize = 1; -- 2.26.2
[PATCH v2 08/17] memblock: make for_each_memblock_type() iterator private
From: Mike Rapoport for_each_memblock_type() is not used outside mm/memblock.c, move it there from include/linux/memblock.h Signed-off-by: Mike Rapoport Reviewed-by: Baoquan He --- include/linux/memblock.h | 5 - mm/memblock.c| 5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..220b5f0dad42 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -532,11 +532,6 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);\ region++) -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = _type->regions[0]; \ -i < memblock_type->cnt;\ -i++, rgn = _type->regions[i]) - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, diff --git a/mm/memblock.c b/mm/memblock.c index 39aceafc57f6..a5b9b3df81fc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -129,6 +129,11 @@ struct memblock memblock __initdata_memblock = { .current_limit = MEMBLOCK_ALLOC_ANYWHERE, }; +#define for_each_memblock_type(i, memblock_type, rgn) \ + for (i = 0, rgn = _type->regions[0]; \ +i < memblock_type->cnt;\ +i++, rgn = _type->regions[i]) + int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; -- 2.26.2
[PATCH v2 07/17] mircoblaze: drop unneeded NUMA and sparsemem initializations
From: Mike Rapoport microblaze does not support neither NUMA not SPARSMEM, so there is no point to call memblock_set_node() and sparse_memory_present_with_active_regions() functions during microblaze memory initialization. Remove these calls and the surrounding code. Signed-off-by: Mike Rapoport --- arch/microblaze/mm/init.c | 17 + 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 521b59ba716c..49e0c241f9b1 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -105,9 +105,8 @@ static void __init paging_init(void) void __init setup_memory(void) { - struct memblock_region *reg; - #ifndef CONFIG_MMU + struct memblock_region *reg; u32 kernel_align_start, kernel_align_size; /* Find main memory where is the kernel */ @@ -161,20 +160,6 @@ void __init setup_memory(void) pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); - /* Add active regions with valid PFNs */ - for_each_memblock(memory, reg) { - unsigned long start_pfn, end_pfn; - - start_pfn = memblock_region_memory_base_pfn(reg); - end_pfn = memblock_region_memory_end_pfn(reg); - memblock_set_node(start_pfn << PAGE_SHIFT, - (end_pfn - start_pfn) << PAGE_SHIFT, - , 0); - } - - /* XXX need to clip this if using highmem? */ - sparse_memory_present_with_active_regions(0); - paging_init(); } -- 2.26.2
[PATCH v2 06/17] riscv: drop unneeded node initialization
From: Mike Rapoport RISC-V does not (yet) support NUMA and for UMA architectures node 0 is used implicitly during early memory initialization. There is no need to call memblock_set_node(), remove this call and the surrounding code. Signed-off-by: Mike Rapoport --- arch/riscv/mm/init.c | 9 - 1 file changed, 9 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 79e9d55bdf1a..7440ba2cdaaa 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -191,15 +191,6 @@ void __init setup_bootmem(void) early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn = memblock_region_memory_base_pfn(reg); - unsigned long end_pfn = memblock_region_memory_end_pfn(reg); - - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), - , 0); - } } #ifdef CONFIG_MMU -- 2.26.2
[PATCH v2 05/17] h8300, nds32, openrisc: simplify detection of memory extents
From: Mike Rapoport Instead of traversing memblock.memory regions to find memory_start and memory_end, simply query memblock_{start,end}_of_DRAM(). Signed-off-by: Mike Rapoport Acked-by: Stafford Horne --- arch/h8300/kernel/setup.c| 8 +++- arch/nds32/kernel/setup.c| 8 ++-- arch/openrisc/kernel/setup.c | 9 ++--- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 28ac88358a89..0281f92eea3d 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -74,17 +74,15 @@ static void __init bootmem_init(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) panic("No memory!"); /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ min_low_pfn = PFN_UP(memory_start); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = PFN_DOWN(memory_end); max_pfn = max_low_pfn; memblock_reserve(__pa(_stext), _end - _stext); diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index a066efbe53c0..c356e484dcab 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -249,12 +249,8 @@ static void __init setup_memory(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - pr_info("%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8aa438e1f51f..c5706153d3b6 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -48,17 +48,12 @@ static void __init setup_memory(void) unsigned long ram_start_pfn; unsigned long ram_end_pfn; phys_addr_t memory_start, memory_end; - struct memblock_region *region; memory_end = memory_start = 0; /* Find main memory where is the kernel, we assume its the only one */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); -- 2.26.2
[PATCH v2 04/17] arm64: numa: simplify dummy_numa_init()
From: Mike Rapoport dummy_numa_init() loops over memblock.memory and passes nid=0 to numa_add_memblk() which essentially wraps memblock_set_node(). However, memblock_set_node() can cope with entire memory span itself, so the loop over memblock.memory regions is redundant. Using a single call to memblock_set_node() rather than a loop also fixes an issue with a buggy ACPI firmware in which the SRAT table covers some but not all of the memory in the EFI memory map. Jonathan Cameron says: This issue can be easily triggered by having an SRAT table which fails to cover all elements of the EFI memory map. This firmware error is detected and a warning printed. e.g. "NUMA: Warning: invalid memblk node 64 [mem 0x24000-0x27fff]" At that point we fall back to dummy_numa_init(). However, the failed ACPI init has left us with our memblocks all broken up as we split them when trying to assign them to NUMA nodes. We then iterate over the memblocks and add them to node 0. numa_add_memblk() calls memblock_set_node() which merges regions that were previously split up during the earlier attempt to add them to different nodes during parsing of SRAT. This means elements are moved in the memblock array and we can end up in a different memblock after the call to numa_add_memblk(). Result is: Unable to handle kernel paging request at virtual address 3a40 Mem abort info: ESR = 0x9604 EC = 0x25: DABT (current EL), IL = 32 bits SET = 0, FnV = 0 EA = 0, S1PTW = 0 Data abort info: ISV = 0, ISS = 0x0004 CM = 0, WnR = 0 [3a40] user address but active_mm is swapper Internal error: Oops: 9604 [#1] PREEMPT SMP ... Call trace: sparse_init_nid+0x5c/0x2b0 sparse_init+0x138/0x170 bootmem_init+0x80/0xe0 setup_arch+0x2a0/0x5fc start_kernel+0x8c/0x648 Replace the loop with a single call to memblock_set_node() to the entire memory. Signed-off-by: Mike Rapoport Acked-by: Jonathan Cameron Acked-by: Catalin Marinas --- arch/arm64/mm/numa.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..0cbdbcc885fb 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void)) */ static int __init dummy_numa_init(void) { + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM(); int ret; - struct memblock_region *mblk; if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); - - for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); - if (!ret) - continue; + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1); + ret = numa_add_memblk(0, start, end); + if (ret) { pr_err("NUMA init failed\n"); return ret; } -- 2.26.2
[PATCH v2 03/17] arm, xtensa: simplify initialization of high memory pages
From: Mike Rapoport The function free_highpages() in both arm and xtensa essentially open-code for_each_free_mem_range() loop to detect high memory pages that were not reserved and that should be initialized and passed to the buddy allocator. Replace open-coded implementation of for_each_free_mem_range() with usage of memblock API to simplify the code. Signed-off-by: Mike Rapoport Reviewed-by: Max Filippov # xtensa Tested-by: Max Filippov # xtensa --- arch/arm/mm/init.c| 48 +++-- arch/xtensa/mm/init.c | 55 --- 2 files changed, 18 insertions(+), 85 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 01e18e43b174..626af348eb8f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -352,61 +352,29 @@ static void __init free_unused_memmap(void) #endif } -#ifdef CONFIG_HIGHMEM -static inline void free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} -#endif - static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { - unsigned long res_start, res_end; - - res_start = memblock_region_reserved_base_pfn(res); - res_end = memblock_region_reserved_end_pfn(res); - - if (res_end < start) - continue; - if (res_start < start) - res_start = start; - if (res_start > end) - res_start = end; - if (res_end > end) - res_end = end; - if (res_start != start) - free_area_high(start, res_start); - start = res_end; - if (start == end) - break; - } - - /* And now free anything which remains */ - if (start < end) - free_area_high(start, end); + for (; start < end; start++) + free_highmem_page(pfn_to_page(start)); } #endif } diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index a05b306cf371..ad9d59d93f39 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,67 +79,32 @@ void __init zones_init(void) free_area_init(max_zone_pfn); } -#ifdef CONFIG_HIGHMEM -static void __init free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} - static void __init free_highpages(void) { +#ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; - reset_all_zones_managed_pages(); /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { -
[PATCH v2 02/17] dma-contiguous: simplify cma_early_percent_memory()
From: Mike Rapoport The memory size calculation in cma_early_percent_memory() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport Reviewed-by: Christoph Hellwig --- kernel/dma/contiguous.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 15bc5026c485..1992afd8ca7b 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -73,16 +73,7 @@ early_param("cma", early_cma); static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) { - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); + unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size()); return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; } -- 2.26.2
[PATCH v2 01/17] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
From: Mike Rapoport The memory size calculation in kvm_cma_reserve() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport --- arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cd3cf3d366b..56ab0d28de2a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -95,22 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma); void __init kvm_cma_reserve(void) { unsigned long align_size; - struct memblock_region *reg; - phys_addr_t selected_size = 0; + phys_addr_t selected_size; /* * We need CMA reservation only when we are in HV mode */ if (!cpu_has_feature(CPU_FTR_HVMODE)) return; - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - selected_size += memblock_region_memory_end_pfn(reg) - -memblock_region_memory_base_pfn(reg); + selected_size = PHYS_PFN(memblock_phys_mem_size()); selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, -- 2.26.2
[PATCH v2 00/17] memblock: seasonal cleaning^w cleanup
From: Mike Rapoport Hi, These patches simplify several uses of memblock iterators and hide some of the memblock implementation details from the rest of the system. The patches are on top of v5.8-rc7 + cherry-pick of "mm/sparse: cleanup the code surrounding memory_present()" [1] from mmotm tree. v2 changes: * replace for_each_memblock() with two versions, one for memblock.memory and another one for memblock.reserved * fix overzealous cleanup of powerpc fadamp: keep the traversal over the memblocks, but use better suited iterators * don't remove traversal over memblock.reserved in x86 numa cleanup but replace for_each_memblock() with new for_each_reserved_mem_region() * simplify ramdisk and crash kernel allocations on x86 * drop more redundant and unused code: __next_reserved_mem_region() and memblock_mem_size() * add description of numa initialization fix on arm64 (thanks Jonathan) * add Acked and Reviewed tags [1] http://lkml.kernel.org/r/20200712083130.22919-1-r...@kernel.org Mike Rapoport (17): KVM: PPC: Book3S HV: simplify kvm_cma_reserve() dma-contiguous: simplify cma_early_percent_memory() arm, xtensa: simplify initialization of high memory pages arm64: numa: simplify dummy_numa_init() h8300, nds32, openrisc: simplify detection of memory extents riscv: drop unneeded node initialization mircoblaze: drop unneeded NUMA and sparsemem initializations memblock: make for_each_memblock_type() iterator private memblock: make memblock_debug and related functionality private memblock: reduce number of parameters in for_each_mem_range() arch, mm: replace for_each_memblock() with for_each_mem_pfn_range() arch, drivers: replace for_each_membock() with for_each_mem_range() x86/setup: simplify initrd relocation and reservation x86/setup: simplify reserve_crashkernel() memblock: remove unused memblock_mem_size() memblock: implement for_each_reserved_mem_region() using __next_mem_region() memblock: use separate iterators for memory and reserved regions .clang-format| 4 +- arch/arm/kernel/setup.c | 18 +++-- arch/arm/mm/init.c | 59 arch/arm/mm/mmu.c| 39 --- arch/arm/mm/pmsa-v7.c| 20 +++--- arch/arm/mm/pmsa-v8.c| 17 +++-- arch/arm/xen/mm.c| 7 +- arch/arm64/kernel/machine_kexec_file.c | 6 +- arch/arm64/kernel/setup.c| 4 +- arch/arm64/mm/init.c | 11 ++- arch/arm64/mm/kasan_init.c | 10 +-- arch/arm64/mm/mmu.c | 11 +-- arch/arm64/mm/numa.c | 15 ++--- arch/c6x/kernel/setup.c | 9 +-- arch/h8300/kernel/setup.c| 8 +-- arch/microblaze/mm/init.c| 24 ++- arch/mips/cavium-octeon/dma-octeon.c | 12 ++-- arch/mips/kernel/setup.c | 31 + arch/mips/netlogic/xlp/setup.c | 2 +- arch/nds32/kernel/setup.c| 8 +-- arch/openrisc/kernel/setup.c | 9 +-- arch/openrisc/mm/init.c | 8 ++- arch/powerpc/kernel/fadump.c | 57 arch/powerpc/kvm/book3s_hv_builtin.c | 11 +-- arch/powerpc/mm/book3s64/hash_utils.c| 16 ++--- arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ++- arch/powerpc/mm/kasan/kasan_init_32.c| 8 +-- arch/powerpc/mm/mem.c| 33 + arch/powerpc/mm/numa.c | 7 +- arch/powerpc/mm/pgtable_32.c | 8 +-- arch/riscv/mm/init.c | 34 +++--- arch/riscv/mm/kasan_init.c | 10 +-- arch/s390/kernel/crash_dump.c| 8 +-- arch/s390/kernel/setup.c | 31 + arch/s390/mm/page-states.c | 6 +- arch/s390/mm/vmem.c | 16 +++-- arch/sh/mm/init.c| 9 +-- arch/sparc/mm/init_64.c | 12 ++-- arch/x86/kernel/setup.c | 56 +--- arch/x86/mm/numa.c | 2 +- arch/xtensa/mm/init.c| 55 +++ drivers/bus/mvebu-mbus.c | 12 ++-- drivers/irqchip/irq-gic-v3-its.c | 2 +- drivers/s390/char/zcore.c| 9 +-- include/linux/memblock.h | 65 +- kernel/dma/contiguous.c | 11 +-- mm/memblock.c| 85 mm/page_alloc.c | 11 ++- mm/sparse.c | 10 ++- 49 files changed, 366 insertions(+), 561 deletions(-) -- 2.26.2
Re: [PATCH 06/15] powerpc: fadamp: simplify fadump_reserve_crash_area()
On Thu, Jul 30, 2020 at 10:15:13PM +1000, Michael Ellerman wrote: > Mike Rapoport writes: > > From: Mike Rapoport > > > > fadump_reserve_crash_area() reserves memory from a specified base address > > till the end of the RAM. > > > > Replace iteration through the memblock.memory with a single call to > > memblock_reserve() with appropriate that will take care of proper memory > ^ > parameters? > > reservation. > > > > Signed-off-by: Mike Rapoport > > --- > > arch/powerpc/kernel/fadump.c | 20 +--- > > 1 file changed, 1 insertion(+), 19 deletions(-) > > I think this looks OK to me, but I don't have a setup to test it easily. > I've added Hari to Cc who might be able to. > > But I'll give you an ack in the hope that it works :) Actually, I did some digging in the git log and the traversal was added there on purpose by the commit b71a693d3db3 ("powerpc/fadump: exclude memory holes while reserving memory in second kernel") Presuming this is still reqruired I'm going to drop this patch and will simply replace for_each_memblock() with for_each_mem_range() in v2. > Acked-by: Michael Ellerman > > > > diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c > > index 78ab9a6ee6ac..2446a61e3c25 100644 > > --- a/arch/powerpc/kernel/fadump.c > > +++ b/arch/powerpc/kernel/fadump.c > > @@ -1658,25 +1658,7 @@ int __init fadump_reserve_mem(void) > > /* Preserve everything above the base address */ > > static void __init fadump_reserve_crash_area(u64 base) > > { > > - struct memblock_region *reg; > > - u64 mstart, msize; > > - > > - for_each_memblock(memory, reg) { > > - mstart = reg->base; > > - msize = reg->size; > > - > > - if ((mstart + msize) < base) > > - continue; > > - > > - if (mstart < base) { > > - msize -= (base - mstart); > > - mstart = base; > > - } > > - > > - pr_info("Reserving %lluMB of memory at %#016llx for preserving > > crash data", > > - (msize >> 20), mstart); > > - memblock_reserve(mstart, msize); > > - } > > + memblock_reserve(base, memblock_end_of_DRAM() - base); > > } > > > > unsigned long __init arch_reserved_kernel_pages(void) > > -- > > 2.26.2 -- Sincerely yours, Mike.
Re: [PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved
On Tue, Jul 28, 2020 at 07:02:54PM +0800, Baoquan He wrote: > On 07/28/20 at 08:11am, Mike Rapoport wrote: > > From: Mike Rapoport > > > > numa_clear_kernel_node_hotplug() function first traverses numa_meminfo > > regions to set node ID in memblock.reserved and than traverses > > memblock.reserved to update reserved_nodemask to include node IDs that were > > set in the first loop. > > > > Remove redundant traversal over memblock.reserved and update > > reserved_nodemask while iterating over numa_meminfo. > > > > Signed-off-by: Mike Rapoport > > --- > > arch/x86/mm/numa.c | 26 ++ > > 1 file changed, 10 insertions(+), 16 deletions(-) > > > > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c > > index 8ee952038c80..4078abd33938 100644 > > --- a/arch/x86/mm/numa.c > > +++ b/arch/x86/mm/numa.c > > @@ -498,31 +498,25 @@ static void __init > > numa_clear_kernel_node_hotplug(void) > > * and use those ranges to set the nid in memblock.reserved. > > * This will split up the memblock regions along node > > * boundaries and will set the node IDs as well. > > +* > > +* The nid will also be set in reserved_nodemask which is later > > +* used to clear MEMBLOCK_HOTPLUG flag. > > +* > > +* [ Note, when booting with mem=nn[kMG] or in a kdump kernel, > > +* numa_meminfo might not include all memblock.reserved > > +* memory ranges, because quirks such as trim_snb_memory() > > +* reserve specific pages for Sandy Bridge graphics. > > +* These ranges will remain with nid == MAX_NUMNODES. ] > > */ > > for (i = 0; i < numa_meminfo.nr_blks; i++) { > > struct numa_memblk *mb = numa_meminfo.blk + i; > > int ret; > > > > ret = memblock_set_node(mb->start, mb->end - mb->start, > > , mb->nid); > > + node_set(mb->nid, reserved_nodemask); > > Really? This will set all node id into reserved_nodemask. But in the > current code, it's setting nid into memblock reserved region which > interleaves with numa_memoinfo, then get those nid and set it in > reserved_nodemask. This is so different, with my understanding. Please > correct me if I am wrong. You are right, I've missed the intersections of numa_meminfo with memblock.reserved. x86 interaction with membock is so, hmm, interesting... > Thanks > Baoquan > > > WARN_ON_ONCE(ret); > > } > > > > - /* > > -* Now go over all reserved memblock regions, to construct a > > -* node mask of all kernel reserved memory areas. > > -* > > -* [ Note, when booting with mem=nn[kMG] or in a kdump kernel, > > -* numa_meminfo might not include all memblock.reserved > > -* memory ranges, because quirks such as trim_snb_memory() > > -* reserve specific pages for Sandy Bridge graphics. ] > > -*/ > > - for_each_memblock(reserved, mb_region) { > > - int nid = memblock_get_region_node(mb_region); > > - > > - if (nid != MAX_NUMNODES) > > - node_set(nid, reserved_nodemask); > > - } > > - > > /* > > * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory > > * belonging to the reserved node mask. > > -- > > 2.26.2 > > > > > -- Sincerely yours, Mike.
Re: [PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved
On Tue, Jul 28, 2020 at 12:44:40PM +0200, Ingo Molnar wrote: > > * Mike Rapoport wrote: > > > From: Mike Rapoport > > > > numa_clear_kernel_node_hotplug() function first traverses numa_meminfo > > regions to set node ID in memblock.reserved and than traverses > > memblock.reserved to update reserved_nodemask to include node IDs that were > > set in the first loop. > > > > Remove redundant traversal over memblock.reserved and update > > reserved_nodemask while iterating over numa_meminfo. > > > > Signed-off-by: Mike Rapoport > > --- > > arch/x86/mm/numa.c | 26 ++ > > 1 file changed, 10 insertions(+), 16 deletions(-) > > I suspect you'd like to carry this in the -mm tree? Yes. > Acked-by: Ingo Molnar Thanks! > Thanks, > > Ingo -- Sincerely yours, Mike.
[PATCH 15/15] memblock: remove 'type' parameter from for_each_memblock()
From: Mike Rapoport for_each_memblock() is used exclusively to iterate over memblock.memory in a few places that use data from memblock_region rather than the memory ranges. Remove type parameter from the for_each_memblock() iterator to improve encapsulation of memblock internals from its users. Signed-off-by: Mike Rapoport --- arch/arm64/kernel/setup.c | 2 +- arch/arm64/mm/numa.c | 2 +- arch/mips/netlogic/xlp/setup.c | 2 +- include/linux/memblock.h | 10 +++--- mm/memblock.c | 4 ++-- mm/page_alloc.c| 8 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 93b3844cf442..23da7908cbed 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -217,7 +217,7 @@ static void __init request_standard_resources(void) if (!standard_resources) panic("%s: Failed to allocate %zu bytes\n", __func__, res_size); - for_each_memblock(memory, region) { + for_each_memblock(region) { res = _resources[i++]; if (memblock_is_nomap(region)) { res->name = "reserved"; diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index 0cbdbcc885fb..08721d2c0b79 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -350,7 +350,7 @@ static int __init numa_register_nodes(void) struct memblock_region *mblk; /* Check that valid nid is set to memblks */ - for_each_memblock(memory, mblk) { + for_each_memblock(mblk) { int mblk_nid = memblock_get_region_node(mblk); if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) { diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c index 1a0fc5b62ba4..e69d9fc468cf 100644 --- a/arch/mips/netlogic/xlp/setup.c +++ b/arch/mips/netlogic/xlp/setup.c @@ -70,7 +70,7 @@ static void nlm_fixup_mem(void) const int pref_backup = 512; struct memblock_region *mem; - for_each_memblock(memory, mem) { + for_each_memblock(mem) { memblock_remove(mem->base + mem->size - pref_backup, pref_backup); } diff --git a/include/linux/memblock.h b/include/linux/memblock.h index d70c2835e913..c901cb8ecf92 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -527,9 +527,13 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo return PFN_UP(reg->base + reg->size); } -#define for_each_memblock(memblock_type, region) \ - for (region = memblock.memblock_type.regions; \ -region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);\ +/** + * for_each_memblock - itereate over registered memory regions + * @region: loop variable + */ +#define for_each_memblock(region) \ + for (region = memblock.memory.regions; \ +region < (memblock.memory.regions + memblock.memory.cnt); \ region++) extern void *alloc_large_system_hash(const char *tablename, diff --git a/mm/memblock.c b/mm/memblock.c index 2ad5e6e47215..550bb72cf6cb 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1694,7 +1694,7 @@ static phys_addr_t __init_memblock __find_max_addr(phys_addr_t limit) * the memory memblock regions, if the @limit exceeds the total size * of those regions, max_addr will keep original value PHYS_ADDR_MAX */ - for_each_memblock(memory, r) { + for_each_memblock(r) { if (limit <= r->size) { max_addr = r->base + limit; break; @@ -1864,7 +1864,7 @@ void __init_memblock memblock_trim_memory(phys_addr_t align) phys_addr_t start, end, orig_start, orig_end; struct memblock_region *r; - for_each_memblock(memory, r) { + for_each_memblock(r) { orig_start = r->base; orig_end = r->base + r->size; start = round_up(orig_start, align); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 95af111d69d3..8a19f46dc86e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5927,7 +5927,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn) if (mirrored_kernelcore && zone == ZONE_MOVABLE) { if (!r || *pfn >= memblock_region_memory_end_pfn(r)) { - for_each_memblock(memory, r) { + for_each_memblock(r) { if (*pfn < memblock_region_memory_end_pfn(r)) break; } @@ -6528,7 +6528,7 @@ static unsigned long __init zone_absent_pages_in_node(int nid,
[PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved
From: Mike Rapoport numa_clear_kernel_node_hotplug() function first traverses numa_meminfo regions to set node ID in memblock.reserved and than traverses memblock.reserved to update reserved_nodemask to include node IDs that were set in the first loop. Remove redundant traversal over memblock.reserved and update reserved_nodemask while iterating over numa_meminfo. Signed-off-by: Mike Rapoport --- arch/x86/mm/numa.c | 26 ++ 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 8ee952038c80..4078abd33938 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -498,31 +498,25 @@ static void __init numa_clear_kernel_node_hotplug(void) * and use those ranges to set the nid in memblock.reserved. * This will split up the memblock regions along node * boundaries and will set the node IDs as well. +* +* The nid will also be set in reserved_nodemask which is later +* used to clear MEMBLOCK_HOTPLUG flag. +* +* [ Note, when booting with mem=nn[kMG] or in a kdump kernel, +* numa_meminfo might not include all memblock.reserved +* memory ranges, because quirks such as trim_snb_memory() +* reserve specific pages for Sandy Bridge graphics. +* These ranges will remain with nid == MAX_NUMNODES. ] */ for (i = 0; i < numa_meminfo.nr_blks; i++) { struct numa_memblk *mb = numa_meminfo.blk + i; int ret; ret = memblock_set_node(mb->start, mb->end - mb->start, , mb->nid); + node_set(mb->nid, reserved_nodemask); WARN_ON_ONCE(ret); } - /* -* Now go over all reserved memblock regions, to construct a -* node mask of all kernel reserved memory areas. -* -* [ Note, when booting with mem=nn[kMG] or in a kdump kernel, -* numa_meminfo might not include all memblock.reserved -* memory ranges, because quirks such as trim_snb_memory() -* reserve specific pages for Sandy Bridge graphics. ] -*/ - for_each_memblock(reserved, mb_region) { - int nid = memblock_get_region_node(mb_region); - - if (nid != MAX_NUMNODES) - node_set(nid, reserved_nodemask); - } - /* * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory * belonging to the reserved node mask. -- 2.26.2
[PATCH 13/15] arch, drivers: replace for_each_membock() with for_each_mem_range()
From: Mike Rapoport There are several occurrences of the following pattern: for_each_memblock(memory, reg) { start = __pfn_to_phys(memblock_region_memory_base_pfn(reg); end = __pfn_to_phys(memblock_region_memory_end_pfn(reg)); /* do something with start and end */ } Using for_each_mem_range() iterator is more appropriate in such cases and allows simpler and cleaner code. Signed-off-by: Mike Rapoport --- arch/arm/kernel/setup.c | 18 +++ arch/arm/mm/mmu.c| 39 arch/arm/mm/pmsa-v7.c| 20 ++-- arch/arm/mm/pmsa-v8.c| 17 +-- arch/arm/xen/mm.c| 7 +++-- arch/arm64/mm/kasan_init.c | 8 ++--- arch/arm64/mm/mmu.c | 11 ++- arch/c6x/kernel/setup.c | 9 +++--- arch/microblaze/mm/init.c| 9 +++--- arch/mips/cavium-octeon/dma-octeon.c | 12 arch/mips/kernel/setup.c | 31 +-- arch/openrisc/mm/init.c | 8 +++-- arch/powerpc/kernel/fadump.c | 27 +++- arch/powerpc/mm/book3s64/hash_utils.c| 16 +- arch/powerpc/mm/book3s64/radix_pgtable.c | 11 +++ arch/powerpc/mm/kasan/kasan_init_32.c| 8 ++--- arch/powerpc/mm/mem.c| 16 ++ arch/powerpc/mm/pgtable_32.c | 8 ++--- arch/riscv/mm/init.c | 24 ++- arch/riscv/mm/kasan_init.c | 10 +++--- arch/s390/kernel/setup.c | 27 ++-- arch/s390/mm/vmem.c | 16 +- arch/sparc/mm/init_64.c | 12 +++- drivers/bus/mvebu-mbus.c | 12 drivers/s390/char/zcore.c| 9 +++--- 25 files changed, 187 insertions(+), 198 deletions(-) diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index d8e18cdd96d3..3f65d0ac9f63 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -843,19 +843,25 @@ early_param("mem", early_mem); static void __init request_standard_resources(const struct machine_desc *mdesc) { - struct memblock_region *region; + phys_addr_t start, end, res_end; struct resource *res; + u64 i; kernel_code.start = virt_to_phys(_text); kernel_code.end = virt_to_phys(__init_begin - 1); kernel_data.start = virt_to_phys(_sdata); kernel_data.end = virt_to_phys(_end - 1); - for_each_memblock(memory, region) { - phys_addr_t start = __pfn_to_phys(memblock_region_memory_base_pfn(region)); - phys_addr_t end = __pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1; + for_each_mem_range(i, , ) { unsigned long boot_alias_start; + /* +* In memblock, end points to the first byte after the +* range while in resourses, end points to the last byte in +* the range. +*/ + res_end = end - 1; + /* * Some systems have a special memory alias which is only * used for booting. We need to advertise this region to @@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) __func__, sizeof(*res)); res->name = "System RAM (boot alias)"; res->start = boot_alias_start; - res->end = phys_to_idmap(end); + res->end = phys_to_idmap(res_end); res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; request_resource(_resource, res); } @@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct machine_desc *mdesc) sizeof(*res)); res->name = "System RAM"; res->start = start; - res->end = end; + res->end = res_end; res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY; request_resource(_resource, res); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index 628028bfbb92..a149d9cb4fdb 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -1155,9 +1155,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0; void __init adjust_lowmem_bounds(void) { - phys_addr_t memblock_limit = 0; - u64 vmalloc_limit; - struct memblock_region *reg; + phys_addr_t block_start, block_end, memblock_limit = 0; + u64 vmalloc_limit, i; phys_addr_t lowmem_limit = 0; /* @@ -1173,26 +1172,18 @@ void __init adjust_lowmem_bounds(void) * The first usable region must be PMD alig
[PATCH 12/15] arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
From: Mike Rapoport There are several occurrences of the following pattern: for_each_memblock(memory, reg) { start_pfn = memblock_region_memory_base_pfn(reg); end_pfn = memblock_region_memory_end_pfn(reg); /* do something with start_pfn and end_pfn */ } Rather than iterate over all memblock.memory regions and each time query for their start and end PFNs, use for_each_mem_pfn_range() iterator to get simpler and clearer code. Signed-off-by: Mike Rapoport --- arch/arm/mm/init.c | 11 --- arch/arm64/mm/init.c | 11 --- arch/powerpc/kernel/fadump.c | 11 ++- arch/powerpc/mm/mem.c| 15 --- arch/powerpc/mm/numa.c | 7 ++- arch/s390/mm/page-states.c | 6 ++ arch/sh/mm/init.c| 9 +++-- mm/memblock.c| 6 ++ mm/sparse.c | 10 -- 9 files changed, 35 insertions(+), 51 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 626af348eb8f..bb56668b4f54 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -304,16 +304,14 @@ free_memmap(unsigned long start_pfn, unsigned long end_pfn) */ static void __init free_unused_memmap(void) { - unsigned long start, prev_end = 0; - struct memblock_region *reg; + unsigned long start, end, prev_end = 0; + int i; /* * This relies on each bank being in address order. * The banks are sorted previously in bootmem_init(). */ - for_each_memblock(memory, reg) { - start = memblock_region_memory_base_pfn(reg); - + for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) { #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist @@ -341,8 +339,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(memblock_region_memory_end_pfn(reg), -MAX_ORDER_NR_PAGES); + prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); } #ifdef CONFIG_SPARSEMEM diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 1e93cfc7c47a..271a8ea32482 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -473,12 +473,10 @@ static inline void free_memmap(unsigned long start_pfn, unsigned long end_pfn) */ static void __init free_unused_memmap(void) { - unsigned long start, prev_end = 0; - struct memblock_region *reg; - - for_each_memblock(memory, reg) { - start = __phys_to_pfn(reg->base); + unsigned long start, end, prev_end = 0; + int i; + for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) { #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist due @@ -498,8 +496,7 @@ static void __init free_unused_memmap(void) * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ - prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), -MAX_ORDER_NR_PAGES); + prev_end = ALIGN(end, MAX_ORDER_NR_PAGES); } #ifdef CONFIG_SPARSEMEM diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 2446a61e3c25..fdbafe417139 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1216,14 +1216,15 @@ static void fadump_free_reserved_memory(unsigned long start_pfn, */ static void fadump_release_reserved_area(u64 start, u64 end) { - u64 tstart, tend, spfn, epfn; - struct memblock_region *reg; + u64 tstart, tend, spfn, epfn, reg_spfn, reg_epfn, i; spfn = PHYS_PFN(start); epfn = PHYS_PFN(end); - for_each_memblock(memory, reg) { - tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg)); - tend = min_t(u64, epfn, memblock_region_memory_end_pfn(reg)); + + for_each_mem_pfn_range(i, NUMA_NO_NODE, _spfn, _epfn, NULL) { + tstart = max_t(u64, spfn, reg_spfn); + tend = min_t(u64, epfn, reg_epfn); + if (tstart < tend) { fadump_free_reserved_memory(tstart, tend); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index c2c11eb8dcfc..38d1acd7c8ef 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -192,15 +192,16 @@ void __init initmem_init(void) /* mark pages that don't exist as nosave */ static int __init mark_nonram_nosave(void) { - struct memblock_region *reg, *prev = NULL; + unsigned long spfn, epfn, prev = 0; + int i; - for_each_memblock(memory, reg) { - if (prev && - memblock_region
[PATCH 11/15] memblock: reduce number of parameters in for_each_mem_range()
From: Mike Rapoport Currently for_each_mem_range() iterator is the most generic way to traverse memblock regions. As such, it has 8 parameters and it is hardly convenient to users. Most users choose to utilize one of its wrappers and the only user that actually needs most of the parameters outside memblock is s390 crash dump implementation. To avoid yet another naming for memblock iterators, rename the existing for_each_mem_range() to __for_each_mem_range() and add a new for_each_mem_range() wrapper with only index, start and end parameters. The new wrapper nicely fits into init_unavailable_mem() and will be used in upcoming changes to simplify memblock traversals. Signed-off-by: Mike Rapoport --- .clang-format | 1 + arch/arm64/kernel/machine_kexec_file.c | 6 ++ arch/s390/kernel/crash_dump.c | 8 include/linux/memblock.h | 18 ++ mm/page_alloc.c| 3 +-- 5 files changed, 22 insertions(+), 14 deletions(-) diff --git a/.clang-format b/.clang-format index a0a96088c74f..52ededab25ce 100644 --- a/.clang-format +++ b/.clang-format @@ -205,6 +205,7 @@ ForEachMacros: - 'for_each_memblock_type' - 'for_each_memcg_cache_index' - 'for_each_mem_pfn_range' + - '__for_each_mem_range' - 'for_each_mem_range' - 'for_each_mem_range_rev' - 'for_each_migratetype_order' diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 361a1143e09e..5b0e67b93cdc 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) phys_addr_t start, end; nr_ranges = 1; /* for exclusion of crashkernel region */ - for_each_mem_range(i, , NULL, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) + for_each_mem_range(i, , ) nr_ranges++; cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); @@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) cmem->max_nr_ranges = nr_ranges; cmem->nr_ranges = 0; - for_each_mem_range(i, , NULL, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) { + for_each_mem_range(i, , ) { cmem->ranges[cmem->nr_ranges].start = start; cmem->ranges[cmem->nr_ranges].end = end - 1; cmem->nr_ranges++; diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c index f96a5857bbfd..e28085c725ff 100644 --- a/arch/s390/kernel/crash_dump.c +++ b/arch/s390/kernel/crash_dump.c @@ -549,8 +549,8 @@ static int get_mem_chunk_cnt(void) int cnt = 0; u64 idx; - for_each_mem_range(idx, , _type, NUMA_NO_NODE, - MEMBLOCK_NONE, NULL, NULL, NULL) + __for_each_mem_range(idx, , _type, NUMA_NO_NODE, +MEMBLOCK_NONE, NULL, NULL, NULL) cnt++; return cnt; } @@ -563,8 +563,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset) phys_addr_t start, end; u64 idx; - for_each_mem_range(idx, , _type, NUMA_NO_NODE, - MEMBLOCK_NONE, , , NULL) { + __for_each_mem_range(idx, , _type, NUMA_NO_NODE, +MEMBLOCK_NONE, , , NULL) { phdr->p_filesz = end - start; phdr->p_type = PT_LOAD; phdr->p_offset = start; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e6a23b3db696..d70c2835e913 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -142,7 +142,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, void __memblock_free_late(phys_addr_t base, phys_addr_t size); /** - * for_each_mem_range - iterate through memblock areas from type_a and not + * __for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -153,7 +153,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range(i, type_a, type_b, nid, flags, \ +#define __for_each_mem_range(i, type_a, type_b, nid, flags,\ p_start, p_end, p_nid) \ for (i = 0, __next_mem_range(, nid, flags, type_a, type_b,\ p_start, p_end, p_nid);\ @@ -182,6 +182,16 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t size); __next_mem_range_rev(, nid, flags, type_a, type_b, \
[PATCH 10/15] memblock: make memblock_debug and related functionality private
From: Mike Rapoport The only user of memblock_dbg() outside memblock was s390 setup code and it is converted to use pr_debug() instead. This allows to stop exposing memblock_debug and memblock_dbg() to the rest of the kernel. Signed-off-by: Mike Rapoport --- arch/s390/kernel/setup.c | 4 ++-- include/linux/memblock.h | 12 +--- mm/memblock.c| 13 +++-- 3 files changed, 14 insertions(+), 15 deletions(-) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index 07aa15ba43b3..8b284cf6e199 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -776,8 +776,8 @@ static void __init memblock_add_mem_detect_info(void) unsigned long start, end; int i; - memblock_dbg("physmem info source: %s (%hhd)\n", -get_mem_info_source(), mem_detect.info_source); + pr_debug("physmem info source: %s (%hhd)\n", +get_mem_info_source(), mem_detect.info_source); /* keep memblock lists close to the kernel */ memblock_set_bottom_up(true); for_each_mem_detect_block(i, , ) { diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 220b5f0dad42..e6a23b3db696 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -90,7 +90,6 @@ struct memblock { }; extern struct memblock memblock; -extern int memblock_debug; #ifndef CONFIG_ARCH_KEEP_MEMBLOCK #define __init_memblock __meminit @@ -102,9 +101,6 @@ void memblock_discard(void); static inline void memblock_discard(void) {} #endif -#define memblock_dbg(fmt, ...) \ - if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) - phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -456,13 +452,7 @@ bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size); bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -extern void __memblock_dump_all(void); - -static inline void memblock_dump_all(void) -{ - if (memblock_debug) - __memblock_dump_all(); -} +void memblock_dump_all(void); /** * memblock_set_current_limit - Set the current allocation limit to allow diff --git a/mm/memblock.c b/mm/memblock.c index a5b9b3df81fc..824938849f6d 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -134,7 +134,10 @@ struct memblock memblock __initdata_memblock = { i < memblock_type->cnt;\ i++, rgn = _type->regions[i]) -int memblock_debug __initdata_memblock; +#define memblock_dbg(fmt, ...) \ + if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) + +static int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; static int memblock_memory_in_slab __initdata_memblock = 0; @@ -1919,7 +1922,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type) } } -void __init_memblock __memblock_dump_all(void) +static void __init_memblock __memblock_dump_all(void) { pr_info("MEMBLOCK configuration:\n"); pr_info(" memory size = %pa reserved size = %pa\n", @@ -1933,6 +1936,12 @@ void __init_memblock __memblock_dump_all(void) #endif } +void __init_memblock memblock_dump_all(void) +{ + if (memblock_debug) + __memblock_dump_all(); +} + void __init memblock_allow_resize(void) { memblock_can_resize = 1; -- 2.26.2
[PATCH 09/15] memblock: make for_each_memblock_type() iterator private
From: Mike Rapoport for_each_memblock_type() is not used outside mm/memblock.c, move it there from include/linux/memblock.h Signed-off-by: Mike Rapoport --- include/linux/memblock.h | 5 - mm/memblock.c| 5 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 017fae833d4a..220b5f0dad42 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -532,11 +532,6 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt);\ region++) -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = _type->regions[0]; \ -i < memblock_type->cnt;\ -i++, rgn = _type->regions[i]) - extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, diff --git a/mm/memblock.c b/mm/memblock.c index 39aceafc57f6..a5b9b3df81fc 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -129,6 +129,11 @@ struct memblock memblock __initdata_memblock = { .current_limit = MEMBLOCK_ALLOC_ANYWHERE, }; +#define for_each_memblock_type(i, memblock_type, rgn) \ + for (i = 0, rgn = _type->regions[0]; \ +i < memblock_type->cnt;\ +i++, rgn = _type->regions[i]) + int memblock_debug __initdata_memblock; static bool system_has_some_mirror __initdata_memblock = false; static int memblock_can_resize __initdata_memblock; -- 2.26.2
[PATCH 08/15] mircoblaze: drop unneeded NUMA and sparsemem initializations
From: Mike Rapoport microblaze does not support neither NUMA not SPARSMEM, so there is no point to call memblock_set_node() and sparse_memory_present_with_active_regions() functions during microblaze memory initialization. Remove these calls and the surrounding code. Signed-off-by: Mike Rapoport --- arch/microblaze/mm/init.c | 17 + 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c index 521b59ba716c..49e0c241f9b1 100644 --- a/arch/microblaze/mm/init.c +++ b/arch/microblaze/mm/init.c @@ -105,9 +105,8 @@ static void __init paging_init(void) void __init setup_memory(void) { - struct memblock_region *reg; - #ifndef CONFIG_MMU + struct memblock_region *reg; u32 kernel_align_start, kernel_align_size; /* Find main memory where is the kernel */ @@ -161,20 +160,6 @@ void __init setup_memory(void) pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn); pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn); - /* Add active regions with valid PFNs */ - for_each_memblock(memory, reg) { - unsigned long start_pfn, end_pfn; - - start_pfn = memblock_region_memory_base_pfn(reg); - end_pfn = memblock_region_memory_end_pfn(reg); - memblock_set_node(start_pfn << PAGE_SHIFT, - (end_pfn - start_pfn) << PAGE_SHIFT, - , 0); - } - - /* XXX need to clip this if using highmem? */ - sparse_memory_present_with_active_regions(0); - paging_init(); } -- 2.26.2
[PATCH 07/15] riscv: drop unneeded node initialization
From: Mike Rapoport RISC-V does not (yet) support NUMA and for UMA architectures node 0 is used implicitly during early memory initialization. There is no need to call memblock_set_node(), remove this call and the surrounding code. Signed-off-by: Mike Rapoport --- arch/riscv/mm/init.c | 9 - 1 file changed, 9 deletions(-) diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index 79e9d55bdf1a..7440ba2cdaaa 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -191,15 +191,6 @@ void __init setup_bootmem(void) early_init_fdt_scan_reserved_mem(); memblock_allow_resize(); memblock_dump_all(); - - for_each_memblock(memory, reg) { - unsigned long start_pfn = memblock_region_memory_base_pfn(reg); - unsigned long end_pfn = memblock_region_memory_end_pfn(reg); - - memblock_set_node(PFN_PHYS(start_pfn), - PFN_PHYS(end_pfn - start_pfn), - , 0); - } } #ifdef CONFIG_MMU -- 2.26.2
[PATCH 06/15] powerpc: fadamp: simplify fadump_reserve_crash_area()
From: Mike Rapoport fadump_reserve_crash_area() reserves memory from a specified base address till the end of the RAM. Replace iteration through the memblock.memory with a single call to memblock_reserve() with appropriate that will take care of proper memory reservation. Signed-off-by: Mike Rapoport --- arch/powerpc/kernel/fadump.c | 20 +--- 1 file changed, 1 insertion(+), 19 deletions(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 78ab9a6ee6ac..2446a61e3c25 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1658,25 +1658,7 @@ int __init fadump_reserve_mem(void) /* Preserve everything above the base address */ static void __init fadump_reserve_crash_area(u64 base) { - struct memblock_region *reg; - u64 mstart, msize; - - for_each_memblock(memory, reg) { - mstart = reg->base; - msize = reg->size; - - if ((mstart + msize) < base) - continue; - - if (mstart < base) { - msize -= (base - mstart); - mstart = base; - } - - pr_info("Reserving %lluMB of memory at %#016llx for preserving crash data", - (msize >> 20), mstart); - memblock_reserve(mstart, msize); - } + memblock_reserve(base, memblock_end_of_DRAM() - base); } unsigned long __init arch_reserved_kernel_pages(void) -- 2.26.2
[PATCH 05/15] h8300, nds32, openrisc: simplify detection of memory extents
From: Mike Rapoport Instead of traversing memblock.memory regions to find memory_start and memory_end, simply query memblock_{start,end}_of_DRAM(). Signed-off-by: Mike Rapoport --- arch/h8300/kernel/setup.c| 8 +++- arch/nds32/kernel/setup.c| 8 ++-- arch/openrisc/kernel/setup.c | 9 ++--- 3 files changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c index 28ac88358a89..0281f92eea3d 100644 --- a/arch/h8300/kernel/setup.c +++ b/arch/h8300/kernel/setup.c @@ -74,17 +74,15 @@ static void __init bootmem_init(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) panic("No memory!"); /* setup bootmem globals (we use no_bootmem, but mm still depends on this) */ min_low_pfn = PFN_UP(memory_start); - max_low_pfn = PFN_DOWN(memblock_end_of_DRAM()); + max_low_pfn = PFN_DOWN(memory_end); max_pfn = max_low_pfn; memblock_reserve(__pa(_stext), _end - _stext); diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c index a066efbe53c0..c356e484dcab 100644 --- a/arch/nds32/kernel/setup.c +++ b/arch/nds32/kernel/setup.c @@ -249,12 +249,8 @@ static void __init setup_memory(void) memory_end = memory_start = 0; /* Find main memory where is the kernel */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - pr_info("%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c index 8aa438e1f51f..c5706153d3b6 100644 --- a/arch/openrisc/kernel/setup.c +++ b/arch/openrisc/kernel/setup.c @@ -48,17 +48,12 @@ static void __init setup_memory(void) unsigned long ram_start_pfn; unsigned long ram_end_pfn; phys_addr_t memory_start, memory_end; - struct memblock_region *region; memory_end = memory_start = 0; /* Find main memory where is the kernel, we assume its the only one */ - for_each_memblock(memory, region) { - memory_start = region->base; - memory_end = region->base + region->size; - printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__, - memory_start, memory_end); - } + memory_start = memblock_start_of_DRAM(); + memory_end = memblock_end_of_DRAM(); if (!memory_end) { panic("No memory!"); -- 2.26.2
[PATCH 04/15] arm64: numa: simplify dummy_numa_init()
From: Mike Rapoport dummy_numa_init() loops over memblock.memory and passes nid=0 to numa_add_memblk() which essentially wraps memblock_set_node(). However, memblock_set_node() can cope with entire memory span itself, so the loop over memblock.memory regions is redundant. Replace the loop with a single call to memblock_set_node() to the entire memory. Signed-off-by: Mike Rapoport --- arch/arm64/mm/numa.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c index aafcee3e3f7e..0cbdbcc885fb 100644 --- a/arch/arm64/mm/numa.c +++ b/arch/arm64/mm/numa.c @@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void)) */ static int __init dummy_numa_init(void) { + phys_addr_t start = memblock_start_of_DRAM(); + phys_addr_t end = memblock_end_of_DRAM(); int ret; - struct memblock_region *mblk; if (numa_off) pr_info("NUMA disabled\n"); /* Forced off on command line. */ - pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", - memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1); - - for_each_memblock(memory, mblk) { - ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size); - if (!ret) - continue; + pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1); + ret = numa_add_memblk(0, start, end); + if (ret) { pr_err("NUMA init failed\n"); return ret; } -- 2.26.2
[PATCH 03/15] arm, xtensa: simplify initialization of high memory pages
From: Mike Rapoport The function free_highpages() in both arm and xtensa essentially open-code for_each_free_mem_range() loop to detect high memory pages that were not reserved and that should be initialized and passed to the buddy allocator. Replace open-coded implementation of for_each_free_mem_range() with usage of memblock API to simplify the code. Signed-off-by: Mike Rapoport --- arch/arm/mm/init.c| 48 +++-- arch/xtensa/mm/init.c | 55 --- 2 files changed, 18 insertions(+), 85 deletions(-) diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 01e18e43b174..626af348eb8f 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -352,61 +352,29 @@ static void __init free_unused_memmap(void) #endif } -#ifdef CONFIG_HIGHMEM -static inline void free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} -#endif - static void __init free_highpages(void) { #ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { - unsigned long res_start, res_end; - - res_start = memblock_region_reserved_base_pfn(res); - res_end = memblock_region_reserved_end_pfn(res); - - if (res_end < start) - continue; - if (res_start < start) - res_start = start; - if (res_start > end) - res_start = end; - if (res_end > end) - res_end = end; - if (res_start != start) - free_area_high(start, res_start); - start = res_end; - if (start == end) - break; - } - - /* And now free anything which remains */ - if (start < end) - free_area_high(start, end); + for (; start < end; start++) + free_highmem_page(pfn_to_page(start)); } #endif } diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c index a05b306cf371..ad9d59d93f39 100644 --- a/arch/xtensa/mm/init.c +++ b/arch/xtensa/mm/init.c @@ -79,67 +79,32 @@ void __init zones_init(void) free_area_init(max_zone_pfn); } -#ifdef CONFIG_HIGHMEM -static void __init free_area_high(unsigned long pfn, unsigned long end) -{ - for (; pfn < end; pfn++) - free_highmem_page(pfn_to_page(pfn)); -} - static void __init free_highpages(void) { +#ifdef CONFIG_HIGHMEM unsigned long max_low = max_low_pfn; - struct memblock_region *mem, *res; + phys_addr_t range_start, range_end; + u64 i; - reset_all_zones_managed_pages(); /* set highmem page free */ - for_each_memblock(memory, mem) { - unsigned long start = memblock_region_memory_base_pfn(mem); - unsigned long end = memblock_region_memory_end_pfn(mem); + for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, + _start, _end, NULL) { + unsigned long start = PHYS_PFN(range_start); + unsigned long end = PHYS_PFN(range_end); /* Ignore complete lowmem entries */ if (end <= max_low) continue; - if (memblock_is_nomap(mem)) - continue; - /* Truncate partial highmem entries */ if (start < max_low) start = max_low; - /* Find and exclude any reserved regions */ - for_each_memblock(reserved, res) { - unsigned long res_start, res_end; - - res_start = m
[PATCH 02/15] dma-contiguous: simplify cma_early_percent_memory()
From: Mike Rapoport The memory size calculation in cma_early_percent_memory() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport --- kernel/dma/contiguous.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c index 15bc5026c485..1992afd8ca7b 100644 --- a/kernel/dma/contiguous.c +++ b/kernel/dma/contiguous.c @@ -73,16 +73,7 @@ early_param("cma", early_cma); static phys_addr_t __init __maybe_unused cma_early_percent_memory(void) { - struct memblock_region *reg; - unsigned long total_pages = 0; - - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - total_pages += memblock_region_memory_end_pfn(reg) - - memblock_region_memory_base_pfn(reg); + unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size()); return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT; } -- 2.26.2
[PATCH 01/15] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
From: Mike Rapoport The memory size calculation in kvm_cma_reserve() traverses memblock.memory rather than simply call memblock_phys_mem_size(). The comment in that function suggests that at some point there should have been call to memblock_analyze() before memblock_phys_mem_size() could be used. As of now, there is no memblock_analyze() at all and memblock_phys_mem_size() can be used as soon as cold-plug memory is registerd with memblock. Replace loop over memblock.memory with a call to memblock_phys_mem_size(). Signed-off-by: Mike Rapoport --- arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 7cd3cf3d366b..56ab0d28de2a 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -95,22 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma); void __init kvm_cma_reserve(void) { unsigned long align_size; - struct memblock_region *reg; - phys_addr_t selected_size = 0; + phys_addr_t selected_size; /* * We need CMA reservation only when we are in HV mode */ if (!cpu_has_feature(CPU_FTR_HVMODE)) return; - /* -* We cannot use memblock_phys_mem_size() here, because -* memblock_analyze() has not been called yet. -*/ - for_each_memblock(memory, reg) - selected_size += memblock_region_memory_end_pfn(reg) - -memblock_region_memory_base_pfn(reg); + selected_size = PHYS_PFN(memblock_phys_mem_size()); selected_size = (selected_size * kvm_cma_resv_ratio / 100) << PAGE_SHIFT; if (selected_size) { pr_debug("%s: reserving %ld MiB for global area\n", __func__, -- 2.26.2
[PATCH 00/15] memblock: seasonal cleaning^w cleanup
From: Mike Rapoport Hi, These patches simplify several uses of memblock iterators and hide some of the memblock implementation details from the rest of the system. The patches are on top of v5.8-rc7 + cherry-pick of "mm/sparse: cleanup the code surrounding memory_present()" [1] from mmotm tree. [1] http://lkml.kernel.org/r/20200712083130.22919-1-r...@kernel.org Mike Rapoport (15): KVM: PPC: Book3S HV: simplify kvm_cma_reserve() dma-contiguous: simplify cma_early_percent_memory() arm, xtensa: simplify initialization of high memory pages arm64: numa: simplify dummy_numa_init() h8300, nds32, openrisc: simplify detection of memory extents powerpc: fadamp: simplify fadump_reserve_crash_area() riscv: drop unneeded node initialization mircoblaze: drop unneeded NUMA and sparsemem initializations memblock: make for_each_memblock_type() iterator private memblock: make memblock_debug and related functionality private memblock: reduce number of parameters in for_each_mem_range() arch, mm: replace for_each_memblock() with for_each_mem_pfn_range() arch, drivers: replace for_each_membock() with for_each_mem_range() x86/numa: remove redundant iteration over memblock.reserved memblock: remove 'type' parameter from for_each_memblock() .clang-format| 1 + arch/arm/kernel/setup.c | 18 +--- arch/arm/mm/init.c | 59 +--- arch/arm/mm/mmu.c| 39 ++-- arch/arm/mm/pmsa-v7.c| 20 arch/arm/mm/pmsa-v8.c| 17 --- arch/arm/xen/mm.c| 7 +-- arch/arm64/kernel/machine_kexec_file.c | 6 +-- arch/arm64/kernel/setup.c| 2 +- arch/arm64/mm/init.c | 11 ++--- arch/arm64/mm/kasan_init.c | 8 ++-- arch/arm64/mm/mmu.c | 11 ++--- arch/arm64/mm/numa.c | 15 +++--- arch/c6x/kernel/setup.c | 9 ++-- arch/h8300/kernel/setup.c| 8 ++-- arch/microblaze/mm/init.c| 24 ++ arch/mips/cavium-octeon/dma-octeon.c | 12 ++--- arch/mips/kernel/setup.c | 31 ++--- arch/mips/netlogic/xlp/setup.c | 2 +- arch/nds32/kernel/setup.c| 8 +--- arch/openrisc/kernel/setup.c | 9 +--- arch/openrisc/mm/init.c | 8 ++-- arch/powerpc/kernel/fadump.c | 58 --- arch/powerpc/kvm/book3s_hv_builtin.c | 11 + arch/powerpc/mm/book3s64/hash_utils.c| 16 +++ arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ++--- arch/powerpc/mm/kasan/kasan_init_32.c| 8 ++-- arch/powerpc/mm/mem.c| 33 +++-- arch/powerpc/mm/numa.c | 7 +-- arch/powerpc/mm/pgtable_32.c | 8 ++-- arch/riscv/mm/init.c | 33 - arch/riscv/mm/kasan_init.c | 10 ++-- arch/s390/kernel/crash_dump.c| 8 ++-- arch/s390/kernel/setup.c | 31 - arch/s390/mm/page-states.c | 6 +-- arch/s390/mm/vmem.c | 16 --- arch/sh/mm/init.c| 9 ++-- arch/sparc/mm/init_64.c | 12 ++--- arch/x86/mm/numa.c | 26 --- arch/xtensa/mm/init.c| 55 -- drivers/bus/mvebu-mbus.c | 12 ++--- drivers/s390/char/zcore.c| 9 ++-- include/linux/memblock.h | 45 +- kernel/dma/contiguous.c | 11 + mm/memblock.c| 28 +++ mm/page_alloc.c | 11 ++--- mm/sparse.c | 10 ++-- 47 files changed, 324 insertions(+), 485 deletions(-) -- 2.26.2
Re: [PATCH 20/20] Documentation: vm/memory-model: eliminate duplicated word
On Tue, Jul 07, 2020 at 11:04:13AM -0700, Randy Dunlap wrote: > Drop the doubled word "the". > > Signed-off-by: Randy Dunlap > Cc: Jonathan Corbet > Cc: linux-...@vger.kernel.org > Cc: Andrew Morton > Cc: linux...@kvack.org Reviewed-by: Mike Rapoport > --- > Documentation/vm/memory-model.rst |2 +- > 1 file changed, 1 insertion(+), 1 deletion(-) > > --- linux-next-20200701.orig/Documentation/vm/memory-model.rst > +++ linux-next-20200701/Documentation/vm/memory-model.rst > @@ -159,7 +159,7 @@ frame. Inside a section, the PFN is the > The sparse vmemmap uses a virtually mapped memory map to optimize > pfn_to_page and page_to_pfn operations. There is a global `struct > page *vmemmap` pointer that points to a virtually contiguous array of > -`struct page` objects. A PFN is an index to that array and the the > +`struct page` objects. A PFN is an index to that array and the > offset of the `struct page` from `vmemmap` is the PFN of that > page. > -- Sincerely yours, Mike.
Re: [PATCH 0/8] mm: cleanup usage of
Gentle ping. On Sat, Jun 27, 2020 at 05:34:45PM +0300, Mike Rapoport wrote: > From: Mike Rapoport > > Hi, > > Most architectures have very similar versions of pXd_alloc_one() and > pXd_free_one() for intermediate levels of page table. > These patches add generic versions of these functions in > and enable use of the generic functions where > appropriate. > > In addition, functions declared and defined in headers > are used mostly by core mm and early mm initialization in arch and there is > no actual reason to have the included all over the place. > The first patch in this series removes unneeded includes of > > In the end it didn't work out as neatly as I hoped and moving > pXd_alloc_track() definitions to would require > unnecessary changes to arches that have custom page table allocations, so > I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to > mm/. > > Joerg Roedel (1): > mm: move p?d_alloc_track to separate header file > > Mike Rapoport (7): > mm: remove unneeded includes of > opeinrisc: switch to generic version of pte allocation > xtensa: switch to generic version of pte allocation > asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one() > asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one() > asm-generic: pgalloc: provide generic pgd_free() > mm: move lib/ioremap.c to mm/ > > arch/alpha/include/asm/pgalloc.h | 21 + > arch/alpha/include/asm/tlbflush.h| 1 - > arch/alpha/kernel/core_irongate.c| 1 - > arch/alpha/kernel/core_marvel.c | 1 - > arch/alpha/kernel/core_titan.c | 1 - > arch/alpha/kernel/machvec_impl.h | 2 - > arch/alpha/kernel/smp.c | 1 - > arch/alpha/mm/numa.c | 1 - > arch/arc/mm/fault.c | 1 - > arch/arc/mm/init.c | 1 - > arch/arm/include/asm/pgalloc.h | 12 +-- > arch/arm/include/asm/tlb.h | 1 - > arch/arm/kernel/machine_kexec.c | 1 - > arch/arm/kernel/smp.c| 1 - > arch/arm/kernel/suspend.c| 1 - > arch/arm/mach-omap2/omap-mpuss-lowpower.c| 1 - > arch/arm/mm/hugetlbpage.c| 1 - > arch/arm/mm/mmu.c| 1 + > arch/arm64/include/asm/pgalloc.h | 39 +- > arch/arm64/kernel/smp.c | 1 - > arch/arm64/mm/hugetlbpage.c | 1 - > arch/arm64/mm/ioremap.c | 1 - > arch/arm64/mm/mmu.c | 1 + > arch/csky/include/asm/pgalloc.h | 7 +- > arch/csky/kernel/smp.c | 1 - > arch/hexagon/include/asm/pgalloc.h | 7 +- > arch/ia64/include/asm/pgalloc.h | 24 -- > arch/ia64/include/asm/tlb.h | 1 - > arch/ia64/kernel/process.c | 1 - > arch/ia64/kernel/smp.c | 1 - > arch/ia64/kernel/smpboot.c | 1 - > arch/ia64/mm/contig.c| 1 - > arch/ia64/mm/discontig.c | 1 - > arch/ia64/mm/hugetlbpage.c | 1 - > arch/ia64/mm/tlb.c | 1 - > arch/m68k/include/asm/mmu_context.h | 2 +- > arch/m68k/include/asm/sun3_pgalloc.h | 7 +- > arch/m68k/kernel/dma.c | 2 +- > arch/m68k/kernel/traps.c | 3 +- > arch/m68k/mm/cache.c | 2 +- > arch/m68k/mm/fault.c | 1 - > arch/m68k/mm/kmap.c | 2 +- > arch/m68k/mm/mcfmmu.c| 1 + > arch/m68k/mm/memory.c| 1 - > arch/m68k/sun3x/dvma.c | 2 +- > arch/microblaze/include/asm/pgalloc.h| 6 -- > arch/microblaze/include/asm/tlbflush.h | 1 - > arch/microblaze/kernel/process.c | 1 - > arch/microblaze/kernel/signal.c | 1 - > arch/mips/include/asm/pgalloc.h | 19 + > arch/mips/sgi-ip32/ip32-memory.c | 1 - > arch/nds32/mm/mm-nds32.c | 2 + > arch/nios2/include/asm/pgalloc.h | 7 +- > arch/openrisc/include/asm/pgalloc.h | 33 +--- > arch/openrisc/include/asm/tlbflush.h | 1 - > arch/openrisc/kernel/or32_ksyms.c| 1 - > arch/parisc/include/asm/mmu_context.h| 1 - > arch/parisc/include/asm/pgalloc.h| 12 +-- > arch/parisc/kernel/cache.c | 1 - > arch/parisc/kernel/p
Re: [PATCH 10/20] dm: stop using ->queuedata
On Wed, Jul 01 2020 at 4:59am -0400, Christoph Hellwig wrote: > Instead of setting up the queuedata as well just use one private data > field. > > Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer
Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
On Sat, Jun 27, 2020 at 08:03:04PM +0100, Matthew Wilcox wrote: > On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote: > > More elaborate versions on arm64 and x86 account memory for the user page > > tables and call to pgtable_pmd_page_ctor() as the part of PMD page > > initialization. > > > > Move the arm64 version to include/asm-generic/pgalloc.h and use the generic > > version on several architectures. > > > > The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is > > not enabled, so there is no functional change for most architectures except > > of the addition of __GFP_ACCOUNT for allocation of user page tables. > > Thanks for including this line; it reminded me that we're not setting > the PageTable flag on the page, nor accounting it to the zone page stats. > Hope you don't mind me tagging a patch to do that on as 9/8. We also never set PageTable flag for early page tables and for the page tables allocated directly with get_free_page(), e.g PTI, KASAN. > We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions. > But that brings me to the next question -- could/should some of this > be moved over to asm-generic/pgalloc.h? The ctor/dtor aren't called > from anywhere else, and there's value to reducing the total amount of > code in mm.h, but then there's also value to keeping all the ifdef > ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too. So I'm a bit torn. > What do you think? -- Sincerely yours, Mike.
Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
On Sat, Jun 27, 2020 at 08:03:04PM +0100, Matthew Wilcox wrote: > On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote: > > More elaborate versions on arm64 and x86 account memory for the user page > > tables and call to pgtable_pmd_page_ctor() as the part of PMD page > > initialization. > > > > Move the arm64 version to include/asm-generic/pgalloc.h and use the generic > > version on several architectures. > > > > The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is > > not enabled, so there is no functional change for most architectures except > > of the addition of __GFP_ACCOUNT for allocation of user page tables. > > Thanks for including this line; it reminded me that we're not setting > the PageTable flag on the page, nor accounting it to the zone page stats. > Hope you don't mind me tagging a patch to do that on as 9/8. > > We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions. > But that brings me to the next question -- could/should some of this > be moved over to asm-generic/pgalloc.h? The ctor/dtor aren't called > from anywhere else, and there's value to reducing the total amount of > code in mm.h, but then there's also value to keeping all the ifdef > ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too. So I'm a bit torn. > What do you think? There are arhcitectures that don't use asm-generic/pgalloc.h but rather have their own, sometimes completely different, versoins of these funcitons. I've tried adding linux/pgalloc.h, but I've ended up with contradicting need to include asm/pgalloc.h before the generic code for some architecures or after the generic code for others :) I think let's leave it in mm.h for now, maybe after several more cleaups we could do better. -- Sincerely yours, Mike.
Re: [PATCH 9/8] mm: Account PMD tables like PTE tables
On Sat, Jun 27, 2020 at 07:46:42PM +0100, Matthew Wilcox wrote: > We account the PTE level of the page tables to the process in order to > make smarter OOM decisions and help diagnose why memory is fragmented. > For these same reasons, we should account pages allocated for PMDs. > With larger process address spaces and ASLR, the number of PMDs in use > is higher than it used to be so the inaccuracy is starting to matter. > > Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Mike Rapoport > --- > include/linux/mm.h | 24 > 1 file changed, 20 insertions(+), 4 deletions(-) > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index dc7b87310c10..b283e25fcffa 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2271,7 +2271,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct > *mm, pmd_t *pmd) > return ptlock_ptr(pmd_to_page(pmd)); > } > > -static inline bool pgtable_pmd_page_ctor(struct page *page) > +static inline bool pmd_ptlock_init(struct page *page) > { > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > page->pmd_huge_pte = NULL; > @@ -2279,7 +2279,7 @@ static inline bool pgtable_pmd_page_ctor(struct page > *page) > return ptlock_init(page); > } > > -static inline void pgtable_pmd_page_dtor(struct page *page) > +static inline void pmd_ptlock_free(struct page *page) > { > #ifdef CONFIG_TRANSPARENT_HUGEPAGE > VM_BUG_ON_PAGE(page->pmd_huge_pte, page); > @@ -2296,8 +2296,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct > *mm, pmd_t *pmd) > return >page_table_lock; > } > > -static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } > -static inline void pgtable_pmd_page_dtor(struct page *page) {} > +static inline bool pmd_ptlock_init(struct page *page) { return true; } > +static inline void pmd_ptlock_free(struct page *page) {} > > #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) > > @@ -2310,6 +2310,22 @@ static inline spinlock_t *pmd_lock(struct mm_struct > *mm, pmd_t *pmd) > return ptl; > } > > +static inline bool pgtable_pmd_page_ctor(struct page *page) > +{ > + if (!pmd_ptlock_init(page)) > + return false; > + __SetPageTable(page); > + inc_zone_page_state(page, NR_PAGETABLE); > + return true; > +} > + > +static inline void pgtable_pmd_page_dtor(struct page *page) > +{ > + pmd_ptlock_free(page); > + __ClearPageTable(page); > + dec_zone_page_state(page, NR_PAGETABLE); > +} > + > /* > * No scalability reason to split PUD locks yet, but follow the same pattern > * as the PMD locks to make it easier if we decide to. The VM should not be > -- > 2.27.0 > -- Sincerely yours, Mike.
[PATCH 8/8] mm: move p?d_alloc_track to separate header file
From: Joerg Roedel The functions are only used in two source files, both residing in mm/ subdirectory, so there is no need for them to be in the global header. Move them to the new mm/pgalloc-track.h header and include it only where needed. [rppt: mv include/linux/pgalloc-track.h mm/] Link: http://lkml.kernel.org/r/20200609120533.25867-1-j...@8bytes.org Signed-off-by: Joerg Roedel Cc: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Abdul Haleem Cc: Satheesh Rajendran Cc: Stephen Rothwell Cc: Steven Rostedt (VMware) Cc: Mike Rapoport Cc: Christophe Leroy Signed-off-by: Mike Rapoport --- include/linux/mm.h | 45 mm/ioremap.c | 2 ++ mm/pgalloc-track.h | 51 ++ mm/vmalloc.c | 1 + 4 files changed, 54 insertions(+), 45 deletions(-) create mode 100644 mm/pgalloc-track.h diff --git a/include/linux/mm.h b/include/linux/mm.h index dc7b87310c10..5e878a3c7c57 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2093,51 +2093,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, NULL : pud_offset(p4d, address); } -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, -unsigned long address, -pgtbl_mod_mask *mod_mask) - -{ - if (unlikely(pgd_none(*pgd))) { - if (__p4d_alloc(mm, pgd, address)) - return NULL; - *mod_mask |= PGTBL_PGD_MODIFIED; - } - - return p4d_offset(pgd, address); -} - -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, -unsigned long address, -pgtbl_mod_mask *mod_mask) -{ - if (unlikely(p4d_none(*p4d))) { - if (__pud_alloc(mm, p4d, address)) - return NULL; - *mod_mask |= PGTBL_P4D_MODIFIED; - } - - return pud_offset(p4d, address); -} - static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? NULL: pmd_offset(pud, address); } - -static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, -unsigned long address, -pgtbl_mod_mask *mod_mask) -{ - if (unlikely(pud_none(*pud))) { - if (__pmd_alloc(mm, pud, address)) - return NULL; - *mod_mask |= PGTBL_PUD_MODIFIED; - } - - return pmd_offset(pud, address); -} #endif /* CONFIG_MMU */ #if USE_SPLIT_PTE_PTLOCKS @@ -2253,11 +2213,6 @@ static inline void pgtable_pte_page_dtor(struct page *page) ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ NULL: pte_offset_kernel(pmd, address)) -#define pte_alloc_kernel_track(pmd, address, mask) \ - ((unlikely(pmd_none(*(pmd))) && \ - (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ - NULL: pte_offset_kernel(pmd, address)) - #if USE_SPLIT_PMD_PTLOCKS static struct page *pmd_to_page(pmd_t *pmd) diff --git a/mm/ioremap.c b/mm/ioremap.c index 5ee3526f71b8..5fa1ab41d152 100644 --- a/mm/ioremap.c +++ b/mm/ioremap.c @@ -13,6 +13,8 @@ #include #include +#include "pgalloc-track.h" + #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP static int __read_mostly ioremap_p4d_capable; static int __read_mostly ioremap_pud_capable; diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h new file mode 100644 index ..1dcc865029a2 --- /dev/null +++ b/mm/pgalloc-track.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_PGALLLC_TRACK_H +#define _LINUX_PGALLLC_TRACK_H + +#if defined(CONFIG_MMU) +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, +unsigned long address, +pgtbl_mod_mask *mod_mask) +{ + if (unlikely(pgd_none(*pgd))) { + if (__p4d_alloc(mm, pgd, address)) + return NULL; + *mod_mask |= PGTBL_PGD_MODIFIED; + } + + return p4d_offset(pgd, address); +} + +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, +unsigned long address, +pgtbl_mod_mask *mod_mask) +{ + if (unlikely(p4d_none(*p4d))) { + if (__pud_alloc(mm, p4d, address)) + return NULL; + *mod_mask |= PGTBL_P4D_MODIFIED; + } + + return pud_offset(p4d, address); +} + +static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, +unsigned long address, +
[PATCH 6/8] asm-generic: pgalloc: provide generic pgd_free()
From: Mike Rapoport Most architectures define pgd_free() as a wrapper for free_page(). Provide a generic version in asm-generic/pgalloc.h and enable its use for most architectures. Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgalloc.h | 6 -- arch/arm/include/asm/pgalloc.h| 1 + arch/arm64/include/asm/pgalloc.h | 1 + arch/csky/include/asm/pgalloc.h | 7 +-- arch/hexagon/include/asm/pgalloc.h| 7 +-- arch/ia64/include/asm/pgalloc.h | 5 - arch/m68k/include/asm/sun3_pgalloc.h | 7 +-- arch/microblaze/include/asm/pgalloc.h | 6 -- arch/mips/include/asm/pgalloc.h | 5 - arch/nds32/mm/mm-nds32.c | 2 ++ arch/nios2/include/asm/pgalloc.h | 7 +-- arch/parisc/include/asm/pgalloc.h | 1 + arch/riscv/include/asm/pgalloc.h | 5 - arch/sh/include/asm/pgalloc.h | 1 + arch/um/include/asm/pgalloc.h | 1 - arch/um/kernel/mem.c | 5 - arch/x86/include/asm/pgalloc.h| 1 + arch/xtensa/include/asm/pgalloc.h | 5 - include/asm-generic/pgalloc.h | 7 +++ 19 files changed, 18 insertions(+), 62 deletions(-) diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h index 4834cd52e9d0..9c6a24fe493d 100644 --- a/arch/alpha/include/asm/pgalloc.h +++ b/arch/alpha/include/asm/pgalloc.h @@ -34,10 +34,4 @@ pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) extern pgd_t *pgd_alloc(struct mm_struct *mm); -static inline void -pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - #endif /* _ALPHA_PGALLOC_H */ diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h index c5bdfd404ea5..15f4674715f8 100644 --- a/arch/arm/include/asm/pgalloc.h +++ b/arch/arm/include/asm/pgalloc.h @@ -65,6 +65,7 @@ static inline void clean_pte_table(pte_t *pte) #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL #define __HAVE_ARCH_PTE_ALLOC_ONE +#define __HAVE_ARCH_PGD_FREE #include static inline pte_t * diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 0965945b595d..3c6a7f5988b1 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -13,6 +13,7 @@ #include #include +#define __HAVE_ARCH_PGD_FREE #include #define PGD_SIZE (PTRS_PER_PGD * sizeof(pgd_t)) diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h index c7c1ed27e348..d58d8146b729 100644 --- a/arch/csky/include/asm/pgalloc.h +++ b/arch/csky/include/asm/pgalloc.h @@ -9,7 +9,7 @@ #include #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL -#include/* for pte_{alloc,free}_one */ +#include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, pte_t *pte) @@ -42,11 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) return pte; } -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_pages((unsigned long)pgd, PGD_ORDER); -} - static inline pgd_t *pgd_alloc(struct mm_struct *mm) { pgd_t *ret; diff --git a/arch/hexagon/include/asm/pgalloc.h b/arch/hexagon/include/asm/pgalloc.h index cc9be514a676..f0c47e6a7427 100644 --- a/arch/hexagon/include/asm/pgalloc.h +++ b/arch/hexagon/include/asm/pgalloc.h @@ -11,7 +11,7 @@ #include #include -#include/* for pte_{alloc,free}_one */ +#include extern unsigned long long kmap_generation; @@ -41,11 +41,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return pgd; } -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_page((unsigned long) pgd); -} - static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t pte) { diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 06f80358e20f..9601cfe83c94 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -29,11 +29,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm) return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); } -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - #if CONFIG_PGTABLE_LEVELS == 4 static inline void p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud) diff --git a/arch/m68k/include/asm/sun3_pgalloc.h b/arch/m68k/include/asm/sun3_pgalloc.h index 11b95dadf7c0..000f64869b91 100644 --- a/arch/m68k/include/asm/sun3_pgalloc.h +++ b/arch/m68k/include/asm/sun3_pgalloc.h @@ -13,7 +13,7 @@ #include -#include/* for pte_{alloc,free}_one */ +#include extern const char bad_pmd_string[]; @@ -40,11 +40,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, pgtable_t page */ #define pmd_free(mm, x)do { } while (0) -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd
[PATCH 5/8] asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
From: Mike Rapoport Several architectures define pud_alloc_one() as a wrapper for __get_free_page() and pud_free() as a wrapper for free_page(). Provide a generic implementation in asm-generic/pgalloc.h and use it where appropriate. Signed-off-by: Mike Rapoport --- arch/arm64/include/asm/pgalloc.h | 11 --- arch/ia64/include/asm/pgalloc.h | 9 - arch/mips/include/asm/pgalloc.h | 6 +- arch/x86/include/asm/pgalloc.h | 15 --- include/asm-generic/pgalloc.h| 30 ++ 5 files changed, 31 insertions(+), 40 deletions(-) diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 7246d0a662e1..0965945b595d 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -37,17 +37,6 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t pmdp, pudval_t prot) #if CONFIG_PGTABLE_LEVELS > 3 -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - return (pud_t *)__get_free_page(GFP_PGTABLE_USER); -} - -static inline void pud_free(struct mm_struct *mm, pud_t *pudp) -{ - BUG_ON((unsigned long)pudp & (PAGE_SIZE-1)); - free_page((unsigned long)pudp); -} - static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot) { set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot)); diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h index 5da1fc76477b..06f80358e20f 100644 --- a/arch/ia64/include/asm/pgalloc.h +++ b/arch/ia64/include/asm/pgalloc.h @@ -41,15 +41,6 @@ p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud) p4d_val(*p4d_entry) = __pa(pud); } -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO); -} - -static inline void pud_free(struct mm_struct *mm, pud_t *pud) -{ - free_page((unsigned long)pud); -} #define __pud_free_tlb(tlb, pud, address) pud_free((tlb)->mm, pud) #endif /* CONFIG_PGTABLE_LEVELS == 4 */ diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h index eed1b3e8c642..e5a840910ce0 100644 --- a/arch/mips/include/asm/pgalloc.h +++ b/arch/mips/include/asm/pgalloc.h @@ -14,6 +14,7 @@ #include #define __HAVE_ARCH_PMD_ALLOC_ONE +#define __HAVE_ARCH_PUD_ALLOC_ONE #include static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd, @@ -87,11 +88,6 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long address) return pud; } -static inline void pud_free(struct mm_struct *mm, pud_t *pud) -{ - free_pages((unsigned long)pud, PUD_ORDER); -} - static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud) { set_p4d(p4d, __p4d((unsigned long)pud)); diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h index 25feaa117c40..3d1085a14347 100644 --- a/arch/x86/include/asm/pgalloc.h +++ b/arch/x86/include/asm/pgalloc.h @@ -123,21 +123,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, p4d_t *p4d, pud_t *pu set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud))); } -static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) -{ - gfp_t gfp = GFP_KERNEL_ACCOUNT; - - if (mm == _mm) - gfp &= ~__GFP_ACCOUNT; - return (pud_t *)get_zeroed_page(gfp); -} - -static inline void pud_free(struct mm_struct *mm, pud_t *pud) -{ - BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); - free_page((unsigned long)pud); -} - extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud); static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud, diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h index 1bc027891a00..d361574aaadf 100644 --- a/include/asm-generic/pgalloc.h +++ b/include/asm-generic/pgalloc.h @@ -145,6 +145,36 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t *pmd) #endif /* CONFIG_PGTABLE_LEVELS > 2 */ +#if CONFIG_PGTABLE_LEVELS > 3 + +#ifndef __HAVE_ARCH_PUD_FREE +/** + * pud_alloc_one - allocate a page for PUD-level page table + * @mm: the mm_struct of the current context + * + * Allocates a page using %GFP_PGTABLE_USER for user context and + * %GFP_PGTABLE_KERNEL for kernel context. + * + * Return: pointer to the allocated memory or %NULL on error + */ +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + gfp_t gfp = GFP_PGTABLE_USER; + + if (mm == _mm) + gfp = GFP_PGTABLE_KERNEL; + return (pud_t *)get_zeroed_page(gfp); +} +#endif + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +#endif /* CONFIG_PGTABLE_LEVELS > 3 */ + #endif /* CONFIG_MMU */ #endif /* __ASM_GENERIC_PGALLOC_H */ -- 2.26.2
[PATCH 3/8] xtensa: switch to generic version of pte allocation
From: Mike Rapoport xtensa clears PTEs during allocation of the page tables and pte_clear() sets the PTE to a non-zero value. Splitting ptes_clear() helper out of pte_alloc_one() and pte_alloc_one_kernel() allows reuse of base generic allocation methods (__pte_alloc_one() and __pte_alloc_one_kernel()) and the common GFP mask for page table allocations. The pte_free() and pte_free_kernel() implementations on xtensa are identical to the generic ones and can be dropped. Signed-off-by: Mike Rapoport --- arch/xtensa/include/asm/pgalloc.h | 41 ++- 1 file changed, 19 insertions(+), 22 deletions(-) diff --git a/arch/xtensa/include/asm/pgalloc.h b/arch/xtensa/include/asm/pgalloc.h index 1d38f0e755ba..60ee94b42850 100644 --- a/arch/xtensa/include/asm/pgalloc.h +++ b/arch/xtensa/include/asm/pgalloc.h @@ -8,9 +8,14 @@ #ifndef _XTENSA_PGALLOC_H #define _XTENSA_PGALLOC_H +#ifdef CONFIG_MMU #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#define __HAVE_ARCH_PTE_ALLOC_ONE +#include + /* * Allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. @@ -33,45 +38,37 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) free_page((unsigned long)pgd); } +static inline void ptes_clear(pte_t *ptep) +{ + int i; + + for (i = 0; i < PTRS_PER_PTE; i++) + pte_clear(NULL, 0, ptep + i); +} + static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { pte_t *ptep; - int i; - ptep = (pte_t *)__get_free_page(GFP_KERNEL); + ptep = (pte_t *)__pte_alloc_one_kernel(mm); if (!ptep) return NULL; - for (i = 0; i < 1024; i++) - pte_clear(NULL, 0, ptep + i); + ptes_clear(ptep); return ptep; } static inline pgtable_t pte_alloc_one(struct mm_struct *mm) { - pte_t *pte; struct page *page; - pte = pte_alloc_one_kernel(mm); - if (!pte) - return NULL; - page = virt_to_page(pte); - if (!pgtable_pte_page_ctor(page)) { - __free_page(page); + page = __pte_alloc_one(mm, GFP_PGTABLE_USER); + if (!page) return NULL; - } + ptes_clear(page_address(page)); return page; } -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, pgtable_t pte) -{ - pgtable_pte_page_dtor(pte); - __free_page(pte); -} #define pmd_pgtable(pmd) pmd_page(pmd) +#endif CONFIG_MMU #endif /* _XTENSA_PGALLOC_H */ -- 2.26.2
[PATCH 2/8] opeinrisc: switch to generic version of pte allocation
From: Mike Rapoport Replace pte_alloc_one(), pte_free() and pte_free_kernel() with the generic implementation. The only actual functional change is the addition of __GFP_ACCOUT for the allocation of the user page tables. The pte_alloc_one_kernel() is kept back because its implementation on openrisc is different than the generic one. Signed-off-by: Mike Rapoport --- arch/openrisc/include/asm/pgalloc.h | 33 +++-- 1 file changed, 3 insertions(+), 30 deletions(-) diff --git a/arch/openrisc/include/asm/pgalloc.h b/arch/openrisc/include/asm/pgalloc.h index da12a4c38c4b..88820299ecc4 100644 --- a/arch/openrisc/include/asm/pgalloc.h +++ b/arch/openrisc/include/asm/pgalloc.h @@ -20,6 +20,9 @@ #include #include +#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL +#include + extern int mem_init_done; #define pmd_populate_kernel(mm, pmd, pte) \ @@ -61,38 +64,8 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm) } #endif -static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd) -{ - free_page((unsigned long)pgd); -} - extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm); -static inline struct page *pte_alloc_one(struct mm_struct *mm) -{ - struct page *pte; - pte = alloc_pages(GFP_KERNEL, 0); - if (!pte) - return NULL; - clear_page(page_address(pte)); - if (!pgtable_pte_page_ctor(pte)) { - __free_page(pte); - return NULL; - } - return pte; -} - -static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte) -{ - free_page((unsigned long)pte); -} - -static inline void pte_free(struct mm_struct *mm, struct page *pte) -{ - pgtable_pte_page_dtor(pte); - __free_page(pte); -} - #define __pte_free_tlb(tlb, pte, addr) \ do { \ pgtable_pte_page_dtor(pte); \ -- 2.26.2
[PATCH 1/8] mm: remove unneeded includes of
From: Mike Rapoport In the most cases header is required only for allocations of page table memory. Most of the .c files that include that header do not use symbols declared in and do not require that header. As for the other header files that used to include , it is possible to move that include into the .c file that actually uses symbols from and drop the include from the header file. The process was somewhat automated using sed -i -E '/[<"]asm\/pgalloc\.h/d' \ $(grep -L -w -f /tmp/xx \ $(git grep -E -l '[<"]asm/pgalloc\.h')) where /tmp/xx contains all the symbols defined in arch/*/include/asm/pgalloc.h. Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/tlbflush.h| 1 - arch/alpha/kernel/core_irongate.c| 1 - arch/alpha/kernel/core_marvel.c | 1 - arch/alpha/kernel/core_titan.c | 1 - arch/alpha/kernel/machvec_impl.h | 2 -- arch/alpha/kernel/smp.c | 1 - arch/alpha/mm/numa.c | 1 - arch/arc/mm/fault.c | 1 - arch/arc/mm/init.c | 1 - arch/arm/include/asm/tlb.h | 1 - arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/smp.c| 1 - arch/arm/kernel/suspend.c| 1 - arch/arm/mach-omap2/omap-mpuss-lowpower.c| 1 - arch/arm/mm/hugetlbpage.c| 1 - arch/arm/mm/mmu.c| 1 + arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/hugetlbpage.c | 1 - arch/arm64/mm/ioremap.c | 1 - arch/arm64/mm/mmu.c | 1 + arch/csky/kernel/smp.c | 1 - arch/ia64/include/asm/tlb.h | 1 - arch/ia64/kernel/process.c | 1 - arch/ia64/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/ia64/mm/contig.c| 1 - arch/ia64/mm/discontig.c | 1 - arch/ia64/mm/hugetlbpage.c | 1 - arch/ia64/mm/tlb.c | 1 - arch/m68k/include/asm/mmu_context.h | 2 +- arch/m68k/kernel/dma.c | 2 +- arch/m68k/kernel/traps.c | 3 +-- arch/m68k/mm/cache.c | 2 +- arch/m68k/mm/fault.c | 1 - arch/m68k/mm/kmap.c | 2 +- arch/m68k/mm/mcfmmu.c| 1 + arch/m68k/mm/memory.c| 1 - arch/m68k/sun3x/dvma.c | 2 +- arch/microblaze/include/asm/tlbflush.h | 1 - arch/microblaze/kernel/process.c | 1 - arch/microblaze/kernel/signal.c | 1 - arch/mips/sgi-ip32/ip32-memory.c | 1 - arch/openrisc/include/asm/tlbflush.h | 1 - arch/openrisc/kernel/or32_ksyms.c| 1 - arch/parisc/include/asm/mmu_context.h| 1 - arch/parisc/kernel/cache.c | 1 - arch/parisc/kernel/pci-dma.c | 1 - arch/parisc/kernel/process.c | 1 - arch/parisc/kernel/signal.c | 1 - arch/parisc/kernel/smp.c | 1 - arch/parisc/mm/hugetlbpage.c | 1 - arch/parisc/mm/ioremap.c | 2 +- arch/powerpc/include/asm/tlb.h | 1 - arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 1 - arch/powerpc/mm/book3s64/hash_pgtable.c | 1 - arch/powerpc/mm/book3s64/hash_tlb.c | 1 - arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 1 - arch/powerpc/mm/init_32.c| 1 - arch/powerpc/mm/kasan/8xx.c | 1 - arch/powerpc/mm/kasan/book3s_32.c| 1 - arch/powerpc/mm/mem.c| 1 - arch/powerpc/mm/nohash/40x.c | 1 - arch/powerpc/mm/nohash/8xx.c | 1 - arch/powerpc/mm/nohash/fsl_booke.c | 1 - arch/powerpc/mm/nohash/kaslr_booke.c | 1 - arch/powerpc/mm/pgtable.c| 1 - arch/powerpc/mm/pgtable_64.c | 1 - arch/powerpc/mm/ptdump/hashpagetable.c | 2 +- arch/powerpc/mm/ptdump/ptdump.c | 1 - arch/powerpc/platforms/pseries/cmm.c | 1 - arch/riscv/mm/fault.c| 1 - arch/s390/include/asm/tlb.h | 1 - arch/s390/include/asm/tlbflush.h | 1 - arch/s390/kernel/machine_kexec.c | 1 - arch/s390/kernel/ptrace.c| 1 - arch/s390/kvm/diag.c | 1 - arch/s390/kvm/priv.c | 1 - arch/s390/kvm/pv.c | 1 - arch/s390/mm/cmm.c | 1 - arch/s390/mm/mmap.c | 1 - arch/s390/mm/pgtable.c | 1 - arch/sh/kernel/idle.c| 1 -
[PATCH 0/8] mm: cleanup usage of
From: Mike Rapoport Hi, Most architectures have very similar versions of pXd_alloc_one() and pXd_free_one() for intermediate levels of page table. These patches add generic versions of these functions in and enable use of the generic functions where appropriate. In addition, functions declared and defined in headers are used mostly by core mm and early mm initialization in arch and there is no actual reason to have the included all over the place. The first patch in this series removes unneeded includes of In the end it didn't work out as neatly as I hoped and moving pXd_alloc_track() definitions to would require unnecessary changes to arches that have custom page table allocations, so I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to mm/. Joerg Roedel (1): mm: move p?d_alloc_track to separate header file Mike Rapoport (7): mm: remove unneeded includes of opeinrisc: switch to generic version of pte allocation xtensa: switch to generic version of pte allocation asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one() asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one() asm-generic: pgalloc: provide generic pgd_free() mm: move lib/ioremap.c to mm/ arch/alpha/include/asm/pgalloc.h | 21 + arch/alpha/include/asm/tlbflush.h| 1 - arch/alpha/kernel/core_irongate.c| 1 - arch/alpha/kernel/core_marvel.c | 1 - arch/alpha/kernel/core_titan.c | 1 - arch/alpha/kernel/machvec_impl.h | 2 - arch/alpha/kernel/smp.c | 1 - arch/alpha/mm/numa.c | 1 - arch/arc/mm/fault.c | 1 - arch/arc/mm/init.c | 1 - arch/arm/include/asm/pgalloc.h | 12 +-- arch/arm/include/asm/tlb.h | 1 - arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/smp.c| 1 - arch/arm/kernel/suspend.c| 1 - arch/arm/mach-omap2/omap-mpuss-lowpower.c| 1 - arch/arm/mm/hugetlbpage.c| 1 - arch/arm/mm/mmu.c| 1 + arch/arm64/include/asm/pgalloc.h | 39 +- arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/hugetlbpage.c | 1 - arch/arm64/mm/ioremap.c | 1 - arch/arm64/mm/mmu.c | 1 + arch/csky/include/asm/pgalloc.h | 7 +- arch/csky/kernel/smp.c | 1 - arch/hexagon/include/asm/pgalloc.h | 7 +- arch/ia64/include/asm/pgalloc.h | 24 -- arch/ia64/include/asm/tlb.h | 1 - arch/ia64/kernel/process.c | 1 - arch/ia64/kernel/smp.c | 1 - arch/ia64/kernel/smpboot.c | 1 - arch/ia64/mm/contig.c| 1 - arch/ia64/mm/discontig.c | 1 - arch/ia64/mm/hugetlbpage.c | 1 - arch/ia64/mm/tlb.c | 1 - arch/m68k/include/asm/mmu_context.h | 2 +- arch/m68k/include/asm/sun3_pgalloc.h | 7 +- arch/m68k/kernel/dma.c | 2 +- arch/m68k/kernel/traps.c | 3 +- arch/m68k/mm/cache.c | 2 +- arch/m68k/mm/fault.c | 1 - arch/m68k/mm/kmap.c | 2 +- arch/m68k/mm/mcfmmu.c| 1 + arch/m68k/mm/memory.c| 1 - arch/m68k/sun3x/dvma.c | 2 +- arch/microblaze/include/asm/pgalloc.h| 6 -- arch/microblaze/include/asm/tlbflush.h | 1 - arch/microblaze/kernel/process.c | 1 - arch/microblaze/kernel/signal.c | 1 - arch/mips/include/asm/pgalloc.h | 19 + arch/mips/sgi-ip32/ip32-memory.c | 1 - arch/nds32/mm/mm-nds32.c | 2 + arch/nios2/include/asm/pgalloc.h | 7 +- arch/openrisc/include/asm/pgalloc.h | 33 +--- arch/openrisc/include/asm/tlbflush.h | 1 - arch/openrisc/kernel/or32_ksyms.c| 1 - arch/parisc/include/asm/mmu_context.h| 1 - arch/parisc/include/asm/pgalloc.h| 12 +-- arch/parisc/kernel/cache.c | 1 - arch/parisc/kernel/pci-dma.c | 1 - arch/parisc/kernel/process.c | 1 - arch/parisc/kernel/signal.c | 1 - arch/parisc/kernel/smp.c | 1 - arch/parisc/mm/hugetlbpage.c | 1 - arch/parisc/mm/ioremap.c | 2 +- arch/powerpc/include/asm/tlb.h | 1 - arch/powerpc/mm/book3s64/hash_hugetlbpage.c | 1 - arch/powerpc/mm/book3s64/hash_pgtable.c | 1 - arch/powerpc/mm/book3s64/hash_tlb.c | 1 - arch/powerpc/mm/book3s64
Re: [PATCH] mm: Move p?d_alloc_track to separate header file
On Wed, Jun 17, 2020 at 06:12:26PM -0700, Andrew Morton wrote: > On Tue, 9 Jun 2020 14:05:33 +0200 Joerg Roedel wrote: > > > From: Joerg Roedel > > > > The functions are only used in two source files, so there is no need > > for them to be in the global header. Move them to the new > > header and include it only where needed. > > > > ... > > > > new file mode 100644 > > index ..1dcc865029a2 > > --- /dev/null > > +++ b/include/linux/pgalloc-track.h > > @@ -0,0 +1,51 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef _LINUX_PGALLLC_TRACK_H > > +#define _LINUX_PGALLLC_TRACK_H > > hm, no #includes. I guess this is OK, given the limited use. > > But it does make one wonder whether ioremap.c should be moved from lib/ > to mm/ and this file should be moved from include/linux/ to mm/. It makes sense, but I am anyway planning consolidation of pgalloc.h, so most probably pgalloc-track will not survive until 5.9-rc1 :) If you think that it worth moving ioremap.c to mm/ regardless of chrun, I can send a patch for that. > Oh well. -- Sincerely yours, Mike.
Re: [PATCH V3 (RESEND) 0/3] arm64: Enable vmemmap mapping from device memory
On Thu, Jun 18, 2020 at 06:45:27AM +0530, Anshuman Khandual wrote: > This series enables vmemmap backing memory allocation from device memory > ranges on arm64. But before that, it enables vmemmap_populate_basepages() > and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based > alocation requests. > > This series applies on 5.8-rc1. > > Pending Question: > > altmap_alloc_block_buf() does not have any other remaining users in > the tree after this change. Should it be converted into a static > function and it's declaration be dropped from the header > (include/linux/mm.h). Avoided doing so because I was not sure if there > are any off-tree users or not. Well, off-tree users probably have an active fork anyway so they could switch to vmemmap_alloc_block_buf()... Regardless, can you please update Documentation/vm/memory-model.rst to keep it in sync with the code? > Changes in V3: > > - Dropped comment from free_hotplug_page_range() per Robin > - Modified comment in unmap_hotplug_range() per Robin > - Enabled altmap support in vmemmap_alloc_block_buf() per Robin > > Changes in V2: (https://lkml.org/lkml/2020/3/4/475) > > - Rebased on latest hot-remove series (v14) adding P4D page table support > > Changes in V1: (https://lkml.org/lkml/2020/1/23/12) > > - Added an WARN_ON() in unmap_hotplug_range() when altmap is > provided without the page table backing memory being freed > > Changes in RFC V2: (https://lkml.org/lkml/2019/10/21/11) > > - Changed the commit message on 1/2 patch per Will > - Changed the commit message on 2/2 patch as well > - Rebased on arm64 memory hot remove series (v10) > > RFC V1: (https://lkml.org/lkml/2019/6/28/32) > > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Mark Rutland > Cc: Paul Walmsley > Cc: Palmer Dabbelt > Cc: Tony Luck > Cc: Fenghua Yu > Cc: Dave Hansen > Cc: Andy Lutomirski > Cc: Peter Zijlstra > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: David Hildenbrand > Cc: Mike Rapoport > Cc: Michal Hocko > Cc: "Matthew Wilcox (Oracle)" > Cc: "Kirill A. Shutemov" > Cc: Andrew Morton > Cc: Dan Williams > Cc: Pavel Tatashin > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-i...@vger.kernel.org > Cc: linux-ri...@lists.infradead.org > Cc: x...@kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux...@kvack.org > Cc: linux-ker...@vger.kernel.org > > Anshuman Khandual (3): > mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages() > mm/sparsemem: Enable vmem_altmap support in vmemmap_alloc_block_buf() > arm64/mm: Enable vmem_altmap support for vmemmap mappings > > arch/arm64/mm/mmu.c | 59 ++- > arch/ia64/mm/discontig.c | 2 +- > arch/powerpc/mm/init_64.c | 10 +++ > arch/riscv/mm/init.c | 2 +- > arch/x86/mm/init_64.c | 12 > include/linux/mm.h| 8 -- > mm/sparse-vmemmap.c | 38 - > 7 files changed, 87 insertions(+), 44 deletions(-) > > -- > 2.20.1 > -- Sincerely yours, Mike.
Re: [PATCH V3 4/4] Documentation/mm: Add descriptions for arch page table helpers
On Mon, Jun 15, 2020 at 09:07:57AM +0530, Anshuman Khandual wrote: > This adds a specific description file for all arch page table helpers which > is in sync with the semantics being tested via CONFIG_DEBUG_VM_PGTABLE. All > future changes either to these descriptions here or the debug test should > always remain in sync. > > Cc: Jonathan Corbet > Cc: Andrew Morton > Cc: Mike Rapoport > Cc: Vineet Gupta > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Heiko Carstens > Cc: Vasily Gorbik > Cc: Christian Borntraeger > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Borislav Petkov > Cc: "H. Peter Anvin" > Cc: Kirill A. Shutemov > Cc: Paul Walmsley > Cc: Palmer Dabbelt > Cc: linux-snps-...@lists.infradead.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-s...@vger.kernel.org > Cc: linux-ri...@lists.infradead.org > Cc: x...@kernel.org > Cc: linux-a...@vger.kernel.org > Cc: linux...@kvack.org > Cc: linux-...@vger.kernel.org > Cc: linux-ker...@vger.kernel.org > Suggested-by: Mike Rapoport > Signed-off-by: Anshuman Khandual > --- > Documentation/vm/arch_pgtable_helpers.rst | 258 ++ > mm/debug_vm_pgtable.c | 6 + > 2 files changed, 264 insertions(+) > create mode 100644 Documentation/vm/arch_pgtable_helpers.rst Acked-by: Mike Rapoport
Re: [PATCH] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()
On Wed, Jun 17, 2020 at 09:21:42AM +1000, Michael Ellerman wrote: > Andrew Morton writes: > > On Mon, 15 Jun 2020 12:22:29 +0300 Mike Rapoport wrote: > > > >> From: Mike Rapoport > >> > >> The pte_update() implementation for PPC_8xx unfolds page table from the PGD > >> level to access a PMD entry. Since 8xx has only 2-level page table this can > >> be simplified with pmd_off() shortcut. > >> > >> Replace explicit unfolding with pmd_off() and drop defines of pgd_index() > >> and pgd_offset() that are no longer needed. > >> > >> Signed-off-by: Mike Rapoport > >> --- > >> > >> I think it's powerpc material, but I won't mind if Andrew picks it up :) > > > > Via the powerpc tree would be better, please. > > I'll take it into next for v5.9, unless there's a reason it needs to go > into v5.8. I consider it a fixup for 5.8 merge window conflicts. Besides, mering it now may avoid new conflicts in 5.9 ;-) > cheers -- Sincerely yours, Mike.
[PATCH] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()
From: Mike Rapoport The pte_update() implementation for PPC_8xx unfolds page table from the PGD level to access a PMD entry. Since 8xx has only 2-level page table this can be simplified with pmd_off() shortcut. Replace explicit unfolding with pmd_off() and drop defines of pgd_index() and pgd_offset() that are no longer needed. Signed-off-by: Mike Rapoport --- I think it's powerpc material, but I won't mind if Andrew picks it up :) arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index b56f14160ae5..5a590ceaec14 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp) *pmdp = __pmd(0); } -/* to find an entry in a page-table-directory */ -#define pgd_index(address) ((address) >> PGDIR_SHIFT) -#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) - /* * PTE updates. This function is called whenever an existing * valid PTE is updated. This does -not- include set_pte_at() @@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp) * For other page sizes, we have a single entry in the table. */ #ifdef CONFIG_PPC_8xx +static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr); + static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, pte_t *p, unsigned long clr, unsigned long set, int huge) { @@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, p pte_basic_t old = pte_val(*p); pte_basic_t new = (old & ~(pte_basic_t)clr) | set; int num, i; - pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), addr), addr), addr); + pmd_t *pmd = pmd_off(mm, addr); if (!huge) num = PAGE_SIZE / SZ_4K; -- 2.26.2
Re: [PATCH 04/21] mm: free_area_init: use maximal zone PFNs rather than zone sizes
Hi Greg, On Mon, Jun 15, 2020 at 01:53:42PM +1000, Greg Ungerer wrote: > Hi Mike, > > From: Mike Rapoport > > Currently, architectures that use free_area_init() to initialize memory map > > and node and zone structures need to calculate zone and hole sizes. We can > > use free_area_init_nodes() instead and let it detect the zone boundaries > > while the architectures will only have to supply the possible limits for > > the zones. > > > > Signed-off-by: Mike Rapoport > > This is causing some new warnings for me on boot on at least one non-MMU m68k > target: There were a couple of changes that cause this. The free_area_init() now relies on memblock data and architectural limits for zone sizes rather than on explisit pfns calculated by the arch code. I've update motorola variant and missed coldfire. Angelo sent a fix for mcfmmu.c [1] and I've updated it to include nommu as well [1] https://lore.kernel.org/linux-m68k/20200614225119.02-1-angelo.dureghe...@timesys.com >From 55b8523df2a5c4565b132c0691990f0821040fec Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Mon, 15 Jun 2020 00:51:19 +0200 Subject: [PATCH] m68k: fix registration of memory regions with memblock Commit 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option") introduced assumption that UMA systems have their memory at node 0 and updated most of them, but it forgot nommu and coldfire variants of m68k. The later change in free area initialization in commit fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes") exposed that and caused a lot of "BUG: Bad page state in process swapper" reports. Using memblock_add_node() with nid = 0 to register memory banks solves the problem. Fixes: 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option") Fixes: fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than zone sizes") Signed-off-by: Angelo Dureghello Co-developed-by: Mike Rapoport Signed-off-by: Mike Rapoport --- arch/m68k/kernel/setup_no.c | 2 +- arch/m68k/mm/mcfmmu.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c index e779b19e0193..0c4589a39ba9 100644 --- a/arch/m68k/kernel/setup_no.c +++ b/arch/m68k/kernel/setup_no.c @@ -138,7 +138,7 @@ void __init setup_arch(char **cmdline_p) pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ", __bss_stop, memory_start, memory_start, memory_end); - memblock_add(memory_start, memory_end - memory_start); + memblock_add_node(memory_start, memory_end - memory_start, 0); /* Keep a copy of command line */ *cmdline_p = _line[0]; diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c index 29f47923aa46..7d04210d34f0 100644 --- a/arch/m68k/mm/mcfmmu.c +++ b/arch/m68k/mm/mcfmmu.c @@ -174,7 +174,7 @@ void __init cf_bootmem_alloc(void) m68k_memory[0].addr = _rambase; m68k_memory[0].size = _ramend - _rambase; - memblock_add(m68k_memory[0].addr, m68k_memory[0].size); + memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0); /* compute total pages in system */ num_pages = PFN_DOWN(_ramend - _rambase); -- 2.26.2 > ... > NET: Registered protocol family 17 > BUG: Bad page state in process swapper pfn:20165 > page:41fe0ca0 refcount:0 mapcount:1 mapping: index:0x0 > flags: 0x0() > raw: 0100 0122 > page dumped because: nonzero mapcount > CPU: 0 PID: 1 Comm: swapper Not tainted 5.8.0-rc1-1-g3a38f8a60c65-dirty #1 > Stack from 404c9ebc: > 404c9ebc 4029ab28 4029ab28 40088470 41fe0ca0 40299e21 40299df1 > 404ba2a4 > 00020165 41fd2c10 402c7ba0 41fd2c04 40088504 41fe0ca0 > 40299e21 > 40088a12 41fe0ca0 41fe0ca4 020a 0001 > 402ca000 > 41fe0ca0 41fd2c10 41fd2c10 402b2388 > 0001 ... > > System boots pretty much as normal through user space after this. > Seems to be fully operational despite all those BUGONs. > > Specifically this is a M5208EVB target (arch/m68k/configs/m5208evb). > > > [snip] > > diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c > > index b88d510d4fe3..6d3147662ff2 100644 > > --- a/arch/m68k/mm/init.c > > +++ b/arch/m68k/mm/init.c > > @@ -84,7 +84,7 @@ void __init paging_init(void) > > * page_alloc get different views of the world. > > */ > > unsigned long end_mem = memory_end & PAGE_MASK; > > - unsigned long zones_size[MAX_NR_ZONES] = { 0, }; > > + unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, }; > > high_memory = (void *) end_mem; > > @@ -98
Re: [PATCH] mm: Move p?d_alloc_track to separate header file
On Tue, Jun 09, 2020 at 02:05:33PM +0200, Joerg Roedel wrote: > From: Joerg Roedel > > The functions are only used in two source files, so there is no need > for them to be in the global header. Move them to the new > header and include it only where needed. > > Signed-off-by: Joerg Roedel Acked-by: Mike Rapoport > --- > include/linux/mm.h| 45 --- > include/linux/pgalloc-track.h | 51 +++ > lib/ioremap.c | 1 + > mm/vmalloc.c | 1 + > 4 files changed, 53 insertions(+), 45 deletions(-) > create mode 100644 include/linux/pgalloc-track.h > > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 9d6042178ca7..22d8b2a2c9bc 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2092,51 +2092,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, > p4d_t *p4d, > NULL : pud_offset(p4d, address); > } > > -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, > - unsigned long address, > - pgtbl_mod_mask *mod_mask) > - > -{ > - if (unlikely(pgd_none(*pgd))) { > - if (__p4d_alloc(mm, pgd, address)) > - return NULL; > - *mod_mask |= PGTBL_PGD_MODIFIED; > - } > - > - return p4d_offset(pgd, address); > -} > - > -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, > - unsigned long address, > - pgtbl_mod_mask *mod_mask) > -{ > - if (unlikely(p4d_none(*p4d))) { > - if (__pud_alloc(mm, p4d, address)) > - return NULL; > - *mod_mask |= PGTBL_P4D_MODIFIED; > - } > - > - return pud_offset(p4d, address); > -} > - > static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned > long address) > { > return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? > NULL: pmd_offset(pud, address); > } > - > -static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, > - unsigned long address, > - pgtbl_mod_mask *mod_mask) > -{ > - if (unlikely(pud_none(*pud))) { > - if (__pmd_alloc(mm, pud, address)) > - return NULL; > - *mod_mask |= PGTBL_PUD_MODIFIED; > - } > - > - return pmd_offset(pud, address); > -} > #endif /* CONFIG_MMU */ > > #if USE_SPLIT_PTE_PTLOCKS > @@ -2252,11 +2212,6 @@ static inline void pgtable_pte_page_dtor(struct page > *page) > ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ > NULL: pte_offset_kernel(pmd, address)) > > -#define pte_alloc_kernel_track(pmd, address, mask) \ > - ((unlikely(pmd_none(*(pmd))) && \ > - (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ > - NULL: pte_offset_kernel(pmd, address)) > - > #if USE_SPLIT_PMD_PTLOCKS > > static struct page *pmd_to_page(pmd_t *pmd) > diff --git a/include/linux/pgalloc-track.h b/include/linux/pgalloc-track.h > new file mode 100644 > index ..1dcc865029a2 > --- /dev/null > +++ b/include/linux/pgalloc-track.h > @@ -0,0 +1,51 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef _LINUX_PGALLLC_TRACK_H > +#define _LINUX_PGALLLC_TRACK_H > + > +#if defined(CONFIG_MMU) > +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, > + unsigned long address, > + pgtbl_mod_mask *mod_mask) > +{ > + if (unlikely(pgd_none(*pgd))) { > + if (__p4d_alloc(mm, pgd, address)) > + return NULL; > + *mod_mask |= PGTBL_PGD_MODIFIED; > + } > + > + return p4d_offset(pgd, address); > +} > + > +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, > + unsigned long address, > + pgtbl_mod_mask *mod_mask) > +{ > + if (unlikely(p4d_none(*p4d))) { > + if (__pud_alloc(mm, p4d, address)) > + return NULL; > + *mod_mask |= PGTBL_P4D_MODIFIED; > + } > + > + return pud_offset(p4d, address); > +} > + > +static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, > + unsigned long address, > + pgtb
Re: [PATCH] mm: Fix pud_alloc_track()
On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote: > From: Joerg Roedel > > The pud_alloc_track() needs to do different checks based on whether > __ARCH_HAS_5LEVEL_HACK is defined, like it already does in > pud_alloc(). Otherwise it causes boot failures on PowerPC. > > Provide the correct implementations for both possible settings of > __ARCH_HAS_5LEVEL_HACK to fix the boot problems. There is a patch in mmotm [1] that completely removes __ARCH_HAS_5LEVEL_HACK which is a part of the series [2] that updates p4d folding accross architectures. This should fix boot on PowerPC and the addition of pXd_alloc_track() for __ARCH_HAS_5LEVEL_HACK wouldn't be necessary. [1] https://github.com/hnaz/linux-mm/commit/cfae68792af3731ac902ea6ba5ed8df5a0f6bd2f [2] https://lore.kernel.org/kvmarm/20200414153455.21744-1-r...@kernel.org/ > Reported-by: Abdul Haleem > Tested-by: Abdul Haleem > Tested-by: Satheesh Rajendran > Fixes: d8626138009b ("mm: add functions to track page directory > modifications") > Signed-off-by: Joerg Roedel > --- > include/asm-generic/5level-fixup.h | 5 + > include/linux/mm.h | 26 +- > 2 files changed, 18 insertions(+), 13 deletions(-) > > diff --git a/include/asm-generic/5level-fixup.h > b/include/asm-generic/5level-fixup.h > index 58046ddc08d0..afbab31fbd7e 100644 > --- a/include/asm-generic/5level-fixup.h > +++ b/include/asm-generic/5level-fixup.h > @@ -17,6 +17,11 @@ > ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \ > NULL : pud_offset(p4d, address)) > > +#define pud_alloc_track(mm, p4d, address, mask) > \ > + ((unlikely(pgd_none(*(p4d))) && > \ > + (__pud_alloc(mm, p4d, address) || > ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))? \ > + NULL : pud_offset(p4d, address)) > + > #define p4d_alloc(mm, pgd, address) (pgd) > #define p4d_alloc_track(mm, pgd, address, mask) (pgd) > #define p4d_offset(pgd, start) (pgd) > diff --git a/include/linux/mm.h b/include/linux/mm.h > index 66e0977f970a..ad3b31c5bcc3 100644 > --- a/include/linux/mm.h > +++ b/include/linux/mm.h > @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, > p4d_t *p4d, > NULL : pud_offset(p4d, address); > } > > -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, > +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, >unsigned long address, >pgtbl_mod_mask *mod_mask) > - > { > - if (unlikely(pgd_none(*pgd))) { > - if (__p4d_alloc(mm, pgd, address)) > + if (unlikely(p4d_none(*p4d))) { > + if (__pud_alloc(mm, p4d, address)) > return NULL; > - *mod_mask |= PGTBL_PGD_MODIFIED; > + *mod_mask |= PGTBL_P4D_MODIFIED; > } > > - return p4d_offset(pgd, address); > + return pud_offset(p4d, address); > } > > -#endif /* !__ARCH_HAS_5LEVEL_HACK */ > - > -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, > +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, >unsigned long address, >pgtbl_mod_mask *mod_mask) > + > { > - if (unlikely(p4d_none(*p4d))) { > - if (__pud_alloc(mm, p4d, address)) > + if (unlikely(pgd_none(*pgd))) { > + if (__p4d_alloc(mm, pgd, address)) > return NULL; > - *mod_mask |= PGTBL_P4D_MODIFIED; > + *mod_mask |= PGTBL_PGD_MODIFIED; > } > > - return pud_offset(p4d, address); > + return p4d_offset(pgd, address); > } > > +#endif /* !__ARCH_HAS_5LEVEL_HACK */ > + > static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned > long address) > { > return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? > -- > 2.26.2 > -- Sincerely yours, Mike.
Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote: > > sparc32 smp images in next-20200603 still crash for me with a spinlock > recursion. s390 images hang early in boot. Several others (alpha, arm64, > various ppc) don't even compile. I can run some more bisects over time, > but this is becoming a full-time job :-(. I've been able to bisect s390 hang to commit b614345f52bc ("x86/entry: Clarify irq_{enter,exit}_rcu()"). After this commit, lockdep_hardirq_exit() is called twice on s390 (and others) - one time in irq_exit_rcu() and another one in irq_exit(): /** * irq_exit_rcu() - Exit an interrupt context without updating RCU * * Also processes softirqs if needed and possible. */ void irq_exit_rcu(void) { __irq_exit_rcu(); /* must be last! */ lockdep_hardirq_exit(); } /** * irq_exit - Exit an interrupt context, update RCU and lockdep * * Also processes softirqs if needed and possible. */ void irq_exit(void) { irq_exit_rcu(); rcu_irq_exit(); /* must be last! */ lockdep_hardirq_exit(); } Removing the call in irq_exit() make s390 boot again, and judgung by the x86 entry code, the comment /* must be last! */ is stale... @Peter, @Thomas, can you comment please? >From e51d50ee6f4d1f446decf91c2c67230da14ff82c Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Thu, 4 Jun 2020 12:37:03 +0300 Subject: [PATCH] softirq: don't call lockdep_hardirq_exit() twice After commit b614345f52bc ("x86/entry: Clarify irq_{enter,exit}_rcu()") lockdep_hardirq_exit() is called twice on every architecture that uses irq_exit(): one time in irq_exit_rcu() and another one in irq_exit(). Remove the extra call in irq_exit(). Signed-off-by: Mike Rapoport --- kernel/softirq.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/kernel/softirq.c b/kernel/softirq.c index a3eb6eba8c41..7523f4ce4c1d 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -427,7 +427,6 @@ static inline void __irq_exit_rcu(void) void irq_exit_rcu(void) { __irq_exit_rcu(); -/* must be last! */ lockdep_hardirq_exit(); } @@ -440,8 +439,6 @@ void irq_exit(void) { irq_exit_rcu(); rcu_irq_exit(); -/* must be last! */ - lockdep_hardirq_exit(); } /* -- 2.26.2 > Guenter -- Sincerely yours, Mike.
Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
On Wed, Jun 03, 2020 at 11:22:26PM -0700, Ira Weiny wrote: > On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote: > > With linux-next on sparc I too see the spinlock issue; something like: > > ... > Starting syslogd: BUG: spinlock recursion on CPU#0, S01syslogd/139 > lock: 0xf53ef350, .magic: dead4ead, .owner: S01syslogd/139, .owner_cpu: 0 > CPU: 0 PID: 139 Comm: S01syslogd Not tainted 5.7.0-next-20200603 #1 > [f0067d00 : > do_raw_spin_lock+0xa8/0xd8 ] > [f00d598c : > copy_page_range+0x328/0x804 ] > [f0025c34 : > dup_mm+0x334/0x434 ] > [f0027198 : > copy_process+0x1248/0x12d4 ] > [f00273b8 : > _do_fork+0x54/0x30c ] > [f00276e4 : > do_fork+0x5c/0x6c ] > [f000de44 : > sparc_do_fork+0x18/0x38 ] > [f000b7f4 : > do_syscall+0x34/0x40 ] > [5010cd4c : > 0x5010cd4c ] > > > I'm going to bisect between there and HEAD. The sparc issue should be fixed by https://lore.kernel.org/lkml/20200526173302.377-1-w...@kernel.org > Ira -- Sincerely yours, Mike.
Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice
On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote: > On 6/3/20 2:14 PM, Ira Weiny wrote: > > On Wed, Jun 03, 2020 at 01:57:36PM -0700, Andrew Morton wrote: > >> On Thu, 21 May 2020 10:42:50 -0700 Ira Weiny wrote: > >> > >>>>> > >>>>> Actually it occurs to me that the patch consolidating kmap_prot is odd > >>>>> for > >>>>> sparc 32 bit... > >>>>> > >>>>> Its a long shot but could you try reverting this patch? > >>>>> > >>>>> 4ea7d2419e3f kmap: consolidate kmap_prot definitions > >>>>> > >>>> > >>>> That is not easy to revert, unfortunately, due to several follow-up > >>>> patches. > >>> > >>> I have gotten your sparc tests to run and they all pass... > >>> > >>> 08:10:34 > ../linux-build-test/rootfs/sparc/run-qemu-sparc.sh > >>> Build reference: v5.7-rc4-17-g852b6f2edc0f > >>> > >>> Building sparc32:SPARCClassic:nosmp:scsi:hd ... running . passed > >>> Building sparc32:SPARCbook:nosmp:scsi:cd ... running . passed > >>> Building sparc32:LX:nosmp:noapc:scsi:hd ... running . passed > >>> Building sparc32:SS-4:nosmp:initrd ... running . passed > >>> Building sparc32:SS-5:nosmp:scsi:hd ... running . passed > >>> Building sparc32:SS-10:nosmp:scsi:cd ... running . passed > >>> Building sparc32:SS-20:nosmp:scsi:hd ... running . passed > >>> Building sparc32:SS-600MP:nosmp:scsi:hd ... running . passed > >>> Building sparc32:Voyager:nosmp:noapc:scsi:hd ... running . passed > >>> Building sparc32:SS-4:smp:scsi:hd ... running . passed > >>> Building sparc32:SS-5:smp:scsi:cd ... running . passed > >>> Building sparc32:SS-10:smp:scsi:hd ... running . passed > >>> Building sparc32:SS-20:smp:scsi:hd ... running . passed > >>> Building sparc32:SS-600MP:smp:scsi:hd ... running . passed > >>> Building sparc32:Voyager:smp:noapc:scsi:hd ... running . passed > >>> > >>> Is there another test I need to run? > >> > >> This all petered out, but as I understand it, this patchset still might > >> have issues on various architectures. > >> > >> Can folks please provide an update on the testing status? > > > > I believe the tests were failing for Guenter due to another patch set...[1] > > > > My tests with just this series are working. > > > >>From my understanding the other failures were unrelated.[2] > > > > > > I've checked the patch above on top of the mmots which already has > > Ira's patches and it booted fine. I've used sparc32_defconfig to build > > the kernel and qemu-system-sparc with default machine and CPU. > > > > > > Mike, am I wrong? Do you think the kmap() patches are still causing issues? sparc32 UP and microblaze work for me with next-20200603, but I didn't test other architectures. > For my part, all I can say is that -next is in pretty bad shape right now. > The summary of my tests says: > > Build results: > total: 151 pass: 130 fail: 21 > Qemu test results: > total: 430 pass: 375 fail: 55 > > sparc32 smp images in next-20200603 still crash for me with a spinlock > recursion. I think this is because Will's fixes [1] are not yet in -next. > s390 images hang early in boot. Several others (alpha, arm64, > various ppc) don't even compile. I can run some more bisects over time, > but this is becoming a full-time job :-(. > > Guenter [1] https://lore.kernel.org/lkml/20200526173302.377-1-w...@kernel.org -- Sincerely yours, Mike.
Re: [PATCH v4 03/14] arm64: add support for folded p4d page tables
On Fri, May 15, 2020 at 11:40:12AM -0700, Andrew Morton wrote: > On Tue, 14 Apr 2020 18:34:44 +0300 Mike Rapoport wrote: > > > Implement primitives necessary for the 4th level folding, add walks of p4d > > level where appropriate, replace 5level-fixup.h with pgtable-nop4d.h and > > remove __ARCH_USE_5LEVEL_HACK. > > This needed some rework due to arm changes in linux-next. Please check > my handiwork and test it once I've merged this into linux-next? Looks ok to me. It passed defconfig and a couple of randconfig builds and qemu-system-aarch64 boots find with this. > Rejects were > > --- > arch/arm64/include/asm/pgtable.h~arm64-add-support-for-folded-p4d-page-tables > +++ arch/arm64/include/asm/pgtable.h > @@ -596,49 +604,50 @@ static inline phys_addr_t pud_page_paddr > > #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, > pud_val(pud)) > > -#define pgd_none(pgd)(!pgd_val(pgd)) > -#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) > -#define pgd_present(pgd) (pgd_val(pgd)) > +#define p4d_none(p4d)(!p4d_val(p4d)) > +#define p4d_bad(p4d) (!(p4d_val(p4d) & 2)) > +#define p4d_present(p4d) (p4d_val(p4d)) > > -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) > +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) > { > - if (in_swapper_pgdir(pgdp)) { > - set_swapper_pgd(pgdp, pgd); > + if (in_swapper_pgdir(p4dp)) { > + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d))); > return; > } > > - WRITE_ONCE(*pgdp, pgd); > + WRITE_ONCE(*p4dp, p4d); > dsb(ishst); > isb(); > } > > -static inline void pgd_clear(pgd_t *pgdp) > +static inline void p4d_clear(p4d_t *p4dp) > { > - set_pgd(pgdp, __pgd(0)); > + set_p4d(p4dp, __p4d(0)); > } > > -static inline phys_addr_t pgd_page_paddr(pgd_t pgd) > +static inline phys_addr_t p4d_page_paddr(p4d_t p4d) > { > - return __pgd_to_phys(pgd); > + return __p4d_to_phys(p4d); > } > > /* Find an entry in the frst-level page table. */ > #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD > - 1)) > > -#define pud_offset_phys(dir, addr) (pgd_page_paddr(READ_ONCE(*(dir))) + > pud_index(addr) * sizeof(pud_t)) > +#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + > pud_index(addr) * sizeof(pud_t)) > #define pud_offset(dir, addr)((pud_t > *)__va(pud_offset_phys((dir), (addr > > #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, > addr)) > -#define pud_set_fixmap_offset(pgd, addr) > pud_set_fixmap(pud_offset_phys(pgd, addr)) > +#define pud_set_fixmap_offset(p4d, addr) > pud_set_fixmap(pud_offset_phys(p4d, addr)) > #define pud_clear_fixmap() clear_fixmap(FIX_PUD) > > -#define pgd_page(pgd) > pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd))) > +#define p4d_page(p4d) > pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d))) > > /* use ONLY for statically allocated translation tables */ > #define pud_offset_kimg(dir,addr)((pud_t > *)__phys_to_kimg(pud_offset_phys((dir), (addr > > #else > > +#define p4d_page_paddr(p4d) ({ BUILD_BUG(); 0;}) > #define pgd_page_paddr(pgd) ({ BUILD_BUG(); 0;}) > > /* Match pud_offset folding in */ > > > > and > > --- arch/arm64/kvm/mmu.c~arm64-add-support-for-folded-p4d-page-tables > +++ arch/arm64/kvm/mmu.c > @@ -469,7 +517,7 @@ static void stage2_flush_memslot(struct > do { > next = stage2_pgd_addr_end(kvm, addr, end); > if (!stage2_pgd_none(kvm, *pgd)) > - stage2_flush_puds(kvm, pgd, addr, next); > + stage2_flush_p4ds(kvm, pgd, addr, next); > } while (pgd++, addr = next, addr != end); > } > > > > Result: > > From: Mike Rapoport > Subject: arm64: add support for folded p4d page tables > > Implement primitives necessary for the 4th level folding, add walks of p4d > level where appropriate, replace 5level-fixup.h with pgtable-nop4d.h and > remove __ARCH_USE_5LEVEL_HACK. > > Link: http://lkml.kernel.org/r/20200414153455.21744-4-r...@kernel.org > Signed-off-by: Mike Rapoport > Cc: Arnd Bergmann > Cc: Benjamin Herrenschmidt > Cc: Brian Cain > Cc: Catalin Marinas > Cc: Christophe Leroy > Cc: Fenghua Yu > Cc: Geert Uytterhoeven > Cc: Guan Xuetao > Cc: James Morse > Cc: Jonas Bonn > Cc: Julien Thierry > Cc: Ley Foon Tan > Cc: Marc Zyngier > Cc: Michael Ellerman > Cc: Paul Mackerras > Cc: Rich Felke
[PATCH v2 12/12] mm: consolidate pgd_index() and pgd_offset{_k}() definitions
From: Mike Rapoport All architectures tables define pgd_offset() as an entry in the array of PGDs indexed by the pgd_index(), where pgd_index() is (address >> PGD_SHIFT) & (PTRS_PER_PGD - 1) For the most cases, the pgd_offset() uses mm->pgd as the pointer to the top-level page directory and the pgd_offset_k() is a helper that presumes that mm == _mm. Use x86 implementation as the generic one and remove redundant definitions of PGD accessors in most of arch/*/include/asm/pgtable.h The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * ia64 has custom implementation of pgd_index() * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 9 arch/arc/include/asm/pgtable.h | 7 --- arch/arm/include/asm/pgtable.h | 8 arch/arm64/include/asm/pgtable.h | 10 arch/arm64/kernel/hibernate.c| 4 +- arch/arm64/mm/kasan_init.c | 2 +- arch/arm64/mm/mmu.c | 8 ++-- arch/csky/include/asm/pgtable.h | 11 - arch/hexagon/include/asm/pgtable.h | 18 --- arch/ia64/include/asm/pgtable.h | 14 +- arch/m68k/include/asm/mcf_pgtable.h | 11 - arch/m68k/include/asm/motorola_pgtable.h | 16 --- arch/m68k/include/asm/sun3_pgtable.h | 9 arch/microblaze/include/asm/pgtable.h| 7 --- arch/mips/include/asm/pgtable-32.h | 8 arch/mips/include/asm/pgtable-64.h | 8 arch/nds32/include/asm/pgtable.h | 6 --- arch/nios2/include/asm/pgtable.h | 8 arch/openrisc/include/asm/pgtable.h | 10 arch/parisc/include/asm/pgtable.h| 9 arch/powerpc/include/asm/book3s/32/pgtable.h | 7 --- arch/powerpc/include/asm/book3s/64/pgtable.h | 13 - arch/powerpc/include/asm/nohash/32/pgtable.h | 7 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 12 - arch/riscv/include/asm/pgtable.h | 10 arch/riscv/mm/init.c | 12 ++--- arch/s390/include/asm/pgtable.h | 1 - arch/sh/include/asm/pgtable_32.h | 7 --- arch/sh/include/asm/pgtable_64.h | 11 - arch/sparc/include/asm/pgtable_32.h | 8 arch/sparc/include/asm/pgtable_64.h | 7 --- arch/um/include/asm/pgtable.h| 50 ++-- arch/unicore32/include/asm/pgtable.h | 8 arch/x86/include/asm/pgtable.h | 24 -- arch/xtensa/include/asm/pgtable.h| 8 include/linux/pgtable.h | 26 ++ 36 files changed, 55 insertions(+), 339 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 314973d2810d..162c17b2631f 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -276,15 +276,6 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(_mm, (address)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pgd_offset(mm, address)((mm)->pgd+pgd_index(address)) - /* * The smp_read_barrier_depends() in the following functions are required to * order the load of *dir (the pointer in the top level page table) with any diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1146905f594b..f1ed17edb085 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -316,13 +316,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pteval); } -/* - * All kernel related VM pages are in init's mm. - */ -#define pgd_offset_k(address) pgd_offset(_mm, address) -#define pgd_index(addr)((addr) >> PGDIR_SHIFT) -#define pgd_offset(mm, addr) (((mm)->pgd)+pgd_index(addr)) - /* * Macro to quickly access the PGD entry, utlising the fact that some * arch may cache the pointer to Page Directory of "current" task diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 41543bc47660..c02f24400369 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -166,14 +166,6 @@ extern struct page *empty_zero_page; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -/* to find an entry in
[PATCH v2 11/12] mm: consolidate pud_index() and pud_offset() definitions
From: Mike Rapoport All architectures that have at least four-level page tables define pud_offset() as an entry in the array of PUDs indexed by the pud_index(), where pud_index() is (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1) For the most architectures the pud_offset() implementation relies on the availability of pud_page_vaddr() that converts a PUD entry value to the virtual address of the page containing PUD array. Let's use such implementation as a generic and drop most of the definitions of pud_index() and pud_offset() in files. The architectures that didn't provide pud_page_vaddr() are updated to have that defined. The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/arm64/include/asm/pgtable.h| 8 +--- arch/csky/include/asm/pgtable.h | 2 -- arch/ia64/include/asm/pgtable.h | 6 -- arch/mips/include/asm/pgtable-32.h | 1 - arch/mips/include/asm/pgtable-64.h | 7 --- arch/powerpc/include/asm/book3s/64/pgtable.h| 4 arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 4 arch/s390/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable_32.h| 2 -- arch/sh/include/asm/pgtable_64.h| 2 -- arch/sparc/include/asm/pgtable_64.h | 5 - arch/x86/include/asm/pgtable.h | 11 --- include/asm-generic/pgtable-nopud.h | 1 + include/linux/pgtable.h | 16 14 files changed, 23 insertions(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d0175335a2f2..c0155814b374 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -626,11 +626,13 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) return __p4d_to_phys(p4d); } -/* Find an entry in the frst-level page table. */ -#define pud_index(addr)(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline unsigned long p4d_page_vaddr(p4d_t p4d) +{ + return (unsigned long)__va(p4d_page_paddr(p4d)); +} +/* Find an entry in the frst-level page table. */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) -#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) #define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index dc613f20e2e1..c5ab20970857 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -220,8 +220,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(_mm, address) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d75d981e6ee1..4c24e5e18bff 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -383,12 +383,6 @@ pgd_offset (const struct mm_struct *mm, unsigned long address) here. */ #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) -#if CONFIG_PGTABLE_LEVELS == 4 -/* Find an entry in the second-level page table.. */ -#define pud_offset(dir,addr) \ - ((pud_t *) p4d_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) -#endif - /* atomic versions of the some PTE manipulations: */ static inline int diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 05b51c344939..9c0e7a5ffc75 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -199,7 +199,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define pgd_offset_k(address) pgd_offset(_mm, address) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) /* to find an entry in a page-table-directory */ #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 21fb55510d35..38170fdac5bf 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -172,8 +172,6 @@ extern pte_t invalid_pte_table[PTRS_PER_PTE]; -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) - #ifndef __PAGETABLE_PUD_FOLDED /* * For 4-level
[PATCH v2 10/12] mm: consolidate pmd_index() and pmd_offset() definitions
From: Mike Rapoport All architectures define pmd_index() as (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1) and all architectures that have at least three-level page tables define pmd_offset() as an entry in the array of PMDs indexed by the pmd_index(). For the most architectures the pmd_offset() implementation relies on the availability of pud_page_vaddr() that converts a PMD entry value to the virtual address of the page containing PMD array. Let's use such implementation as a generic and drop most of the definitions of pmd_index() and pmd_offset() in files. The architectures that didn't provide pud_page_vaddr() are updated to have that defined. The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * alpha has special requirements for memory access ordering * arm has custom definition of folded 2-level page tables * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 1 + arch/arm/include/asm/pgtable-2level.h| 1 + arch/arm/include/asm/pgtable-3level.h| 7 - arch/arm/include/asm/pgtable-nommu.h | 1 - arch/arm64/include/asm/pgtable.h | 8 -- arch/c6x/include/asm/pgtable.h | 1 - arch/csky/include/asm/pgtable.h | 1 - arch/hexagon/include/asm/pgtable.h | 9 -- arch/ia64/include/asm/pgtable.h | 4 --- arch/m68k/include/asm/motorola_pgtable.h | 7 - arch/microblaze/include/asm/pgtable.h| 1 - arch/mips/include/asm/pgtable-32.h | 1 - arch/mips/include/asm/pgtable-64.h | 6 arch/parisc/include/asm/pgtable.h| 8 -- arch/parisc/kernel/pci-dma.c | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 -- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 -- arch/riscv/include/asm/pgtable-64.h | 7 - arch/riscv/mm/init.c | 12 arch/s390/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable-3level.h | 7 - arch/sh/include/asm/pgtable_32.h | 1 - arch/sh/include/asm/pgtable_64.h | 1 - arch/sparc/include/asm/pgtable_32.h | 9 +- arch/sparc/include/asm/pgtable_64.h | 7 + arch/um/include/asm/pgtable-3level.h | 4 --- arch/um/include/asm/pgtable.h| 4 --- arch/x86/include/asm/pgtable.h | 17 include/asm-generic/pgtable-nopmd.h | 1 + include/linux/pgtable.h | 29 ++-- 30 files changed, 44 insertions(+), 120 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index dac20d03b727..314973d2810d 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -305,6 +305,7 @@ extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) smp_read_barrier_depends(); /* see above */ return ret; } +#define pmd_offset pmd_offset /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 9e084a464a97..3502c2f746ca 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -187,6 +187,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { return (pmd_t *)pud; } +#define pmd_offset pmd_offset #define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_leaf(pmd) (pmd_val(pmd) & 2) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 1933aed9f68d..fbb6693c3352 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -133,13 +133,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -/* Find an entry in the second-level page table.. */ -#define pmd_index(addr)(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} - #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) #define copy_pmd(pmdpd,pmdps) \ diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 1a758f14e0c3..d16aba48fa0a 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -22,7 +22,6 @@ #define pgd_bad(pgd) (0) #define pgd_clear(pgdp) #define kern_addr_valid(addr) (1) -#definepmd_offset(a, b)((void *)0) /* FIXME */ /* * PMD_SHIFT determines the size of the area a second-level page table can map diff
[PATCH v2 09/12] mm: consolidate pte_index() and pte_offset_*() definitions
From: Mike Rapoport All architectures define pte_index() as (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1) and all architectures define pte_offset_kernel() as an entry in the array of PTEs indexed by the pte_index(). For the most architectures the pte_offset_kernel() implementation relies on the availability of pmd_page_vaddr() that converts a PMD entry value to the virtual address of the page containing PTEs array. Let's move x86 definitions of the PTE accessors to the generic place in and then simply drop the respective definitions from the other architectures. The architectures that didn't provide pmd_page_vaddr() are updated to have that defined. The generic implementation of pte_offset_kernel() can be overridden by an architecture and alpha makes use of this because it has special ordering requirements for its version of pte_offset_kernel(). Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 4 +-- arch/arc/include/asm/pgtable.h | 15 -- arch/arm/include/asm/pgtable.h | 15 -- arch/arm64/include/asm/pgtable.h | 10 +++ arch/csky/include/asm/pgtable.h | 16 --- arch/hexagon/include/asm/pgtable.h | 25 +++-- arch/ia64/include/asm/pgtable.h | 9 -- arch/m68k/include/asm/mcf_pgtable.h | 12 +--- arch/m68k/include/asm/motorola_pgalloc.h | 2 +- arch/m68k/include/asm/motorola_pgtable.h | 11 +--- arch/m68k/include/asm/sun3_pgtable.h | 15 -- arch/m68k/mm/init.c | 2 +- arch/microblaze/include/asm/pgtable.h| 13 +++-- arch/mips/include/asm/pgtable-32.h | 12 arch/mips/include/asm/pgtable-64.h | 11 arch/mips/kvm/mmu.c | 20 +++--- arch/mips/kvm/trap_emul.c| 2 +- arch/nds32/include/asm/pgtable.h | 12 +++- arch/nios2/include/asm/pgtable.h | 14 +++--- arch/openrisc/include/asm/pgtable.h | 23 arch/parisc/include/asm/pgtable.h| 15 +++--- arch/powerpc/include/asm/book3s/32/pgtable.h | 13 ++--- arch/powerpc/include/asm/book3s/64/pgtable.h | 8 -- arch/powerpc/include/asm/nohash/32/pgtable.h | 13 ++--- arch/powerpc/include/asm/nohash/64/pgtable.h | 7 - arch/powerpc/include/asm/pgtable.h | 7 + arch/riscv/include/asm/pgtable.h | 10 --- arch/riscv/mm/init.c | 6 ++-- arch/s390/include/asm/pgtable.h | 10 ++- arch/s390/mm/pageattr.c | 2 +- arch/sh/include/asm/pgtable_32.h | 15 -- arch/sh/include/asm/pgtable_64.h | 12 arch/sparc/include/asm/pgalloc_64.h | 2 +- arch/sparc/include/asm/pgtable_32.h | 15 -- arch/sparc/include/asm/pgtable_64.h | 12 ++-- arch/sparc/mm/srmmu.c| 10 --- arch/um/include/asm/pgtable.h| 13 - arch/unicore32/include/asm/pgtable.h | 9 -- arch/x86/include/asm/pgtable.h | 19 - arch/x86/include/asm/pgtable_32.h| 11 arch/x86/include/asm/pgtable_64.h| 4 --- arch/xtensa/include/asm/pgtable.h| 10 +-- include/linux/pgtable.h | 29 43 files changed, 113 insertions(+), 382 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 1c263922beb3..dac20d03b727 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -314,9 +314,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) smp_read_barrier_depends(); /* see above */ return ret; } - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr)) -#define pte_unmap(pte) do { } while (0) +#define pte_offset_kernel pte_offset_kernel extern pgd_t swapper_pg_dir[1024]; diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 29137bcedd93..1146905f594b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -248,9 +248,6 @@ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - #define set_pte(pteptr, pteval)((*(pteptr)) = (pteval)) #define set_pmd(pmdptr, pmdval)(*(pmdptr) = pmdval) @@ -282,18 +279,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define __pte_index(addr) (((addr) >> PAGE_SHIF
[PATCH v2 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE
From: Mike Rapoport The powerpc 32-bit implementation of pgtable has nice shortcuts for accessing kernel PMD and PTE for a given virtual address. Make this helpers available for all architectures. Signed-off-by: Mike Rapoport --- arch/arc/mm/highmem.c | 10 +--- arch/arm/mach-sa1100/assabet.c| 2 +- arch/arm/mm/highmem.c | 4 ++-- arch/arm/mm/ioremap.c | 31 arch/arm/mm/mm.h | 5 arch/arm/mm/mmu.c | 7 +- arch/hexagon/include/asm/fixmap.h | 4 arch/m68k/mm/motorola.c | 26 arch/microblaze/kernel/signal.c | 8 +-- arch/microblaze/mm/init.c | 9 --- arch/mips/include/asm/fixmap.h| 3 --- arch/mips/mm/c-r3k.c | 10 ++-- arch/mips/mm/c-r4k.c | 10 ++-- arch/mips/mm/c-tx39.c | 10 ++-- arch/mips/mm/highmem.c| 2 +- arch/nds32/include/asm/pgtable.h | 2 -- arch/nds32/mm/init.c | 13 ++ arch/nds32/mm/proc.c | 6 + arch/parisc/mm/fixmap.c | 6 + arch/powerpc/include/asm/pgtable.h| 19 --- arch/powerpc/mm/book3s32/mmu.c| 2 +- arch/powerpc/mm/book3s32/tlb.c| 4 ++-- arch/powerpc/mm/kasan/kasan_init_32.c | 8 +++ arch/powerpc/mm/nohash/40x.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 2 +- arch/s390/mm/pageattr.c | 10 +--- arch/sh/mm/cache-sh4.c| 8 +-- arch/sh/mm/kmap.c | 5 +--- arch/sparc/mm/highmem.c | 12 ++ arch/sparc/mm/init_64.c | 6 + arch/sparc/mm/io-unit.c | 10 ++-- arch/sparc/mm/iommu.c | 8 +-- arch/sparc/mm/srmmu.c | 34 +++ arch/um/kernel/mem.c | 10 +--- arch/um/kernel/trap.c | 8 +-- arch/unicore32/mm/mm.h| 10 arch/x86/mm/init_32.c | 26 +++- arch/xtensa/include/asm/fixmap.h | 8 --- arch/xtensa/mm/highmem.c | 2 +- arch/xtensa/mm/kasan_init.c | 10 ++-- arch/xtensa/mm/mmu.c | 5 +--- include/linux/pgtable.h | 24 +++ 42 files changed, 80 insertions(+), 323 deletions(-) diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index d6b74883fd1f..1b9f473c6369 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -92,17 +92,9 @@ EXPORT_SYMBOL(kunmap_atomic_high); static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) { - pgd_t *pgd_k; - p4d_t *p4d_k; - pud_t *pud_k; - pmd_t *pmd_k; + pmd_t *pmd_k = pmd_off_k(kvaddr); pte_t *pte_k; - pgd_k = pgd_offset_k(kvaddr); - p4d_k = p4d_offset(pgd_k, kvaddr); - pud_k = pud_offset(p4d_k, kvaddr); - pmd_k = pmd_offset(pud_k, kvaddr); - pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); if (!pte_k) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 8e3f5fdb4883..aa265ede5730 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -632,7 +632,7 @@ static void __init map_sa1100_gpio_regs( void ) int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO); pmd_t *pmd; - pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(virt), virt), virt), virt); + pmd = pmd_off_k(virt); *pmd = __pmd(phys | prot); flush_pmd_entry(pmd); } diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index e013f6b81328..187fab227b50 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,7 +18,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte) { unsigned long vaddr = __fix_to_virt(idx); - pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *ptep = virt_to_kpte(vaddr); set_pte_ext(ptep, pte, 0); local_flush_tlb_kernel_page(vaddr); @@ -26,7 +26,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte) static inline pte_t get_fixmap_pte(unsigned long vaddr) { - pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *ptep = virt_to_kpte(vaddr); return *ptep; } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 75529d76d28c..000e821b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -141,16 +141,8 @@ void __check_vmalloc_seq(struct mm_struct *mm) static void unmap_area_sections(unsigned long virt, unsigned long size) { unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); - pgd_t *pgd; - p4d_t *p4d; -
[PATCH v2 07/12] x86/mm: simplify init_trampoline() and surrounding logic
From: Mike Rapoport There are three cases for the trampoline initialization: * 32-bit does nothing * 64-bit with kaslr disabled simply copies a PGD entry from the direct map to the trampoline PGD * 64-bit with kaslr enabled maps the real mode trampoline at PUD level These cases are currently differentiated by a bunch of ifdefs inside asm/include/pgtable.h and the case of 64-bits with kaslr on uses pgd_index() helper. Replacing the ifdefs with a static function in arch/x86/mm/init.c gives clearer code and allows moving pgd_index() to the generic implementation in include/linux/pgtable.h Signed-off-by: Mike Rapoport --- arch/x86/include/asm/kaslr.h | 2 ++ arch/x86/include/asm/pgtable.h | 15 +-- arch/x86/include/asm/setup.h | 9 + arch/x86/mm/init.c | 22 ++ arch/x86/mm/kaslr.c| 33 + 5 files changed, 35 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index db7ba2feb947..0648190467ba 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); #else static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} #endif /* CONFIG_RANDOMIZE_MEMORY */ #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d24f8e1f7250..6366136b0e46 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1070,27 +1070,14 @@ void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); + #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); - -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ed8ec011a9fd..d95cacf210bb 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -78,6 +78,15 @@ static inline bool kaslr_enabled(void) return !!(boot_params.hdr.loadflags & KASLR_FLAG); } +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); +} + static inline unsigned long kaslr_offset(void) { return (unsigned long)&_text - __START_KERNEL; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 235dd0e35741..e225ebb25197 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -682,6 +682,28 @@ static void __init memory_map_bottom_up(unsigned long map_start, } } +/* + * The real mode trampoline, which is required for bootstrapping CPUs + * occupies only a small area under the low 1MB. See reserve_real_mode() + * for details. + * + * If KASLR is disabled the first PGD entry of the direct mapping is copied + * to map the real mode trampoline. + * + * If KASLR is enabled, copy only the PUD which covers the low 1MB + * area. This limits the randomization granularity to 1GB for both 4-level + * and 5-level paging. + */ +static void __init init_trampoline(void) +{ +#ifdef CONFIG_X86_64 + if (!kaslr_memory_enabled()) + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; + else + init_trampoline_kaslr(); +#endif +} + void __init init_mem_mapping(void) { unsigned long end; diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index faf02e1e1517..fb620fd9dae9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -61,15 +61,6 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) return (region->size_tb << TB_SHIFT); } -/* - * Apply no randomization if KASLR was disabled at boot or if KASAN - * is enabled. KASAN shadow mappings rely on regions being PGD aligned. - */ -static inline bool kaslr_memory_enabled(void) -{ - return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); -} - /* Initialize base and padding for each memory region randomized with KASLR */ void __init kernel_randomize_memory(void) { @@ -148,7 +139,7 @@ void __init kernel_randomize_memory(void)
[PATCH v2 06/12] m68k/mm: move {cache, nocahe}_page() definitions close to their user
From: Mike Rapoport The cache_page() and nocache_page() functions are only used by the motorola MMU variant for setting caching attributes for the page table pages. Move the definitions of these functions from arch/m68k/include/asm/motorola_pgtable.h closer to their usage in arch/m68k/mm/motorola.c and drop unused definition in arch/m68k/include/asm/mcf_pgtable.h. Signed-off-by: Mike Rapoport Acked-by: Greg Ungerer --- arch/m68k/include/asm/mcf_pgtable.h | 40 - arch/m68k/include/asm/motorola_pgtable.h | 44 arch/m68k/mm/motorola.c | 43 +++ 3 files changed, 43 insertions(+), 84 deletions(-) diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index 0031cd387b75..737e826294f3 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -328,46 +328,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define pte_offset_kernel(dir, address) \ ((pte_t *) __pmd_page(*(dir)) + __pte_offset(address)) -/* - * Disable caching for page at given kernel virtual address. - */ -static inline void nocache_page(void *vaddr) -{ - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - unsigned long addr = (unsigned long) vaddr; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mknocache(*ptep); -} - -/* - * Enable caching for page at given kernel virtual address. - */ -static inline void cache_page(void *vaddr) -{ - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - unsigned long addr = (unsigned long) vaddr; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mkcache(*ptep); -} - /* * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 9e5a3de21e15..e1594acf7c7e 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -227,50 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, unsigned long address) #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_unmap(pte) ((void)0) -/* Prior to calling these routines, the page should have been flushed - * from both the cache and ATC, or the CPU might not notice that the - * cache setting for the page has been changed. -jskov - */ -static inline void nocache_page(void *vaddr) -{ - unsigned long addr = (unsigned long)vaddr; - - if (CPU_IS_040_OR_060) { - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mknocache(*ptep); - } -} - -static inline void cache_page(void *vaddr) -{ - unsigned long addr = (unsigned long)vaddr; - - if (CPU_IS_040_OR_060) { - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mkcache(*ptep); - } -} - /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ #define __swp_type(x) (((x).val >> 4) & 0xff) #define __swp_offset(x)((x).val >> 12) diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 904c2a663977..8e5e74121a78 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -45,6 +45,49 @@ unsigned long mm_cachebits; EXPORT_SYMBOL(mm_cachebits); #endif +/* Prior to calling these routines, the page should have been flushed + * from both the cache and ATC, or the CPU might not notice that the + * cache setting for the page has been changed. -jskov + */ +static inline void nocache_page(void *vaddr) +{ + unsigned long addr = (unsigned long)vaddr; + + if (CPU_IS_040_OR_060) { + pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; +
[PATCH v2 05/12] m68k/mm/motorola: move comment about page table allocation funcitons
From: Mike Rapoport The comment about page table allocation functions resides in include/asm/motorola_pgtable.h while the functions live in include/asm/motorola_pgaloc.h. Move the comment close to the code. Signed-off-by: Mike Rapoport --- arch/m68k/include/asm/motorola_pgalloc.h | 6 ++ arch/m68k/include/asm/motorola_pgtable.h | 6 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index c66e42917912..f3cb453a07b7 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -18,6 +18,12 @@ extern void init_pointer_table(void *table, int type); extern void *get_pointer_table(int type); extern int free_pointer_table(void *table, int type); +/* + * Allocate and free page tables. The xxx_kernel() versions are + * used to allocate a kernel page table - this turns on ASN bits + * if any. + */ + static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { return get_pointer_table(TABLE_PTE); diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 48f19f0ab1e7..9e5a3de21e15 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -227,12 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, unsigned long address) #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_unmap(pte) ((void)0) -/* - * Allocate and free page tables. The xxx_kernel() versions are - * used to allocate a kernel page table - this turns on ASN bits - * if any. - */ - /* Prior to calling these routines, the page should have been flushed * from both the cache and ATC, or the CPU might not notice that the * cache setting for the page has been changed. -jskov -- 2.26.2
[PATCH v2 04/12] csky: replace definitions of __pXd_offset() with pXd_index()
From: Mike Rapoport All architectures use pXd_index() to get an entry in the page table page corresponding to a virtual address. Align csky with other architectures. Signed-off-by: Mike Rapoport --- arch/csky/include/asm/pgtable.h | 5 ++--- arch/csky/mm/fault.c| 2 +- arch/csky/mm/highmem.c | 2 +- arch/csky/mm/init.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9d2d16db237d..2eff4aea51b3 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -229,9 +229,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -#define __pgd_offset(address) pgd_index(address) -#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#define __pmd_offset(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(_mm, address) diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 4e6dc68f3258..4055d430c0c8 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ - int offset = __pgd_offset(address); + int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c index 3b3f622f5ae9..89ec32e602a1 100644 --- a/arch/csky/mm/highmem.c +++ b/arch/csky/mm/highmem.c @@ -92,7 +92,7 @@ static void __init kmap_pages_init(void) vaddr = PKMAP_BASE; fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); - pgd = swapper_pg_dir + __pgd_offset(vaddr); + pgd = swapper_pg_dir + pgd_index(vaddr); pud = (pud_t *)pgd; pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index eda2b4291485..af627128314f 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -157,9 +157,9 @@ void __init fixrange_init(unsigned long start, unsigned long end, unsigned long vaddr; vaddr = start; - i = __pgd_offset(vaddr); - j = __pud_offset(vaddr); - k = __pmd_offset(vaddr); + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -- 2.26.2
[PATCH v2 03/12] mm: reorder includes after introduction of linux/pgtable.h
From: Mike Rapoport The replacement of with made the include of the latter in the middle of asm includes. Fix this up with the aid of the below script and manual adjustments here and there. import sys import re if len(sys.argv) is not 3: print "USAGE: %s " % (sys.argv[0]) sys.exit(1) hdr_to_move="#include " % sys.argv[2] moved = False in_hdrs = False with open(sys.argv[1], "r") as f: lines = f.readlines() for _line in lines: line = _line.rstrip('\n') if line == hdr_to_move: continue if line.startswith("#include --- arch/alpha/kernel/proto.h | 2 -- arch/arc/mm/highmem.c | 2 +- arch/arc/mm/tlbex.S | 2 +- arch/arm/include/asm/efi.h| 1 - arch/arm/include/asm/fixmap.h | 2 +- arch/arm/kernel/head.S| 2 +- arch/arm/kernel/suspend.c | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/arm/mach-integrator/core.c | 2 +- arch/arm/mach-keystone/platsmp.c | 2 +- arch/arm/mach-sa1100/hackkit.c| 2 +- arch/arm/mach-zynq/common.c | 2 +- arch/arm/mm/idmap.c | 2 +- arch/arm/mm/mm.h | 1 - arch/arm/mm/proc-arm1020.S| 2 +- arch/arm/mm/proc-arm1020e.S | 2 +- arch/arm/mm/proc-arm1022.S| 2 +- arch/arm/mm/proc-arm1026.S| 2 +- arch/arm/mm/proc-arm720.S | 2 +- arch/arm/mm/proc-arm740.S | 2 +- arch/arm/mm/proc-arm7tdmi.S | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm922.S | 2 +- arch/arm/mm/proc-arm925.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-arm940.S | 2 +- arch/arm/mm/proc-arm946.S | 2 +- arch/arm/mm/proc-arm9tdmi.S | 2 +- arch/arm/mm/proc-fa526.S | 2 +- arch/arm/mm/proc-feroceon.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa110.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-v7.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- arch/arm/mm/pv-fixup-asm.S| 2 +- arch/arm64/include/asm/io.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/vmap_stack.h | 2 +- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/kaslr.c | 2 +- arch/arm64/kernel/suspend.c | 2 +- arch/arm64/kernel/vmlinux.lds.S | 1 - arch/arm64/mm/proc.S | 2 +- arch/ia64/kernel/entry.S | 2 +- arch/ia64/kernel/head.S | 3 ++- arch/ia64/kernel/irq_ia64.c | 2 +- arch/ia64/kernel/ivt.S| 2 +- arch/ia64/kernel/kprobes.c| 2 +- arch/ia64/kernel/mca_asm.S| 2 +- arch/ia64/kernel/relocate_kernel.S| 4 +--- arch/ia64/kernel/setup.c | 2 +- arch/ia64/kernel/uncached.c | 2 +- arch/ia64/kernel/vmlinux.lds.S| 2 +- arch/m68k/68000/m68VZ328.c| 2 +- arch/m68k/include/asm/sun3xflop.h | 2 +- arch/m68k/kernel/head.S | 2 +- arch/microblaze/include/asm/pgalloc.h | 2 +- arch/microblaze/kernel/hw_exception_handler.S | 2 +- arch/microblaze/kernel/module.c | 2 +- arch/microblaze/kernel/setup.c| 2 +- arch/microblaze/mm/pgtable.c | 2 +- arch/mips/jazz/irq.c | 2 +- arch/mips/jazz/setup.c| 2 +- arch/mips/kvm/mips.c
[PATCH v2 01/12] mm: don't include asm/pgtable.h if linux/mm.h is already included
From: Mike Rapoport The linux/mm.h header includes to allow inlining of the functions involving page table manipulations, e.g. pte_alloc() and pmd_alloc(). So, there is no point to explicitly include in the files that include . The include statements in such cases are remove with a simple loop: for f in $(git grep -l "include ") ; do sed -i -e '/include / d' $f done Signed-off-by: Mike Rapoport --- arch/alpha/boot/bootp.c | 1 - arch/alpha/boot/bootpz.c | 1 - arch/alpha/boot/main.c| 1 - arch/alpha/include/asm/io.h | 1 - arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/ptrace.c| 1 - arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/smp.c | 1 - arch/alpha/kernel/sys_alcor.c | 1 - arch/alpha/kernel/sys_cabriolet.c | 1 - arch/alpha/kernel/sys_dp264.c | 1 - arch/alpha/kernel/sys_eb64p.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_jensen.c| 1 - arch/alpha/kernel/sys_marvel.c| 1 - arch/alpha/kernel/sys_miata.c | 1 - arch/alpha/kernel/sys_mikasa.c| 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/alpha/kernel/sys_noritake.c | 1 - arch/alpha/kernel/sys_rawhide.c | 1 - arch/alpha/kernel/sys_ruffian.c | 1 - arch/alpha/kernel/sys_rx164.c | 1 - arch/alpha/kernel/sys_sable.c | 1 - arch/alpha/kernel/sys_sio.c | 1 - arch/alpha/kernel/sys_sx164.c | 1 - arch/alpha/kernel/sys_takara.c| 1 - arch/alpha/kernel/sys_titan.c | 1 - arch/alpha/kernel/sys_wildfire.c | 1 - arch/alpha/mm/init.c | 1 - arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/module.c | 1 - arch/arm/kernel/ptrace.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm/mach-ebsa110/core.c | 1 - arch/arm/mach-footbridge/common.c | 1 - arch/arm/mach-imx/mm-imx21.c | 1 - arch/arm/mach-imx/mm-imx27.c | 1 - arch/arm/mach-imx/mm-imx3.c | 1 - arch/arm/mach-iop32x/i2c.c| 1 - arch/arm/mach-iop32x/iq31244.c| 1 - arch/arm/mach-iop32x/iq80321.c| 1 - arch/arm/mach-iop32x/n2100.c | 1 - arch/arm/mach-ixp4xx/common.c | 1 - arch/arm/mach-sa1100/assabet.c| 1 - arch/arm/mm/copypage-v4mc.c | 1 - arch/arm/mm/copypage-v6.c | 1 - arch/arm/mm/copypage-xscale.c | 1 - arch/arm/mm/dump.c| 1 - arch/arm/mm/fault-armv.c | 1 - arch/arm/mm/fault.c | 1 - arch/arm/mm/pageattr.c| 1 - arch/arm64/kernel/hibernate.c | 1 - arch/arm64/kernel/ptrace.c| 1 - arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/dump.c | 1 - arch/arm64/mm/fault.c | 1 - arch/arm64/mm/kasan_init.c| 1 - arch/arm64/mm/pageattr.c | 1 - arch/csky/kernel/module.c | 1 - arch/csky/kernel/ptrace.c | 1 - arch/csky/mm/init.c | 1 - arch/csky/mm/tlb.c| 1 - arch/h8300/kernel/process.c | 1 - arch/h8300/kernel/setup.c | 1 - arch/h8300/kernel/signal.c| 1 - arch/h8300/mm/fault.c | 1 - arch/h8300/mm/init.c | 1 - arch/h8300/mm/memory.c| 1 - arch/hexagon/mm/vm_fault.c| 1 - arch/ia64/kernel/efi.c| 1 - arch/ia64/kernel/ptrace.c | 1 - arch/ia64/kernel/smp.c| 1 - arch/ia64/kernel/smpboot.c| 1 - arch/ia64/mm/contig.c | 1 - arch/ia64/mm/fault.c | 1 - arch/m68k/68000/timers.c | 1 - arch/m68k/amiga/config.c | 1 - arch/m68k/apollo/config.c | 1 - arch/m68k/atari/atasound.c| 1 - arch/m68k/atari/stram.c | 1 - arch/m68k/bvme6000/config.c | 1 - arch/m68k/kernel/process.c| 1 - arch/m68k/kernel/ptrace.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/kernel/signal.c | 1 - arch/m68k/kernel/uboot.c
[PATCH v2 00/12] mm: consolidate definitions of page table accessors
From: Mike Rapoport Hi, The low level page table accessors (pXY_index(), pXY_offset()) are duplicated across all architectures and sometimes more than once. For instance, we have 31 definition of pgd_offset() for 25 supported architectures. Most of these definitions are actually identical and typically it boils down to, e.g. static inline unsigned long pmd_index(unsigned long address) { return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } These definitions can be shared among 90% of the arches provided XYZ_SHIFT, PTRS_PER_XYZ and xyz_page_vaddr() are defined. For architectures that really need a custom version there is always possibility to override the generic version with the usual ifdefs magic. These patches introduce include/linux/pgtable.h that replaces include/asm-generic/pgtable.h and add the definitions of the page table accessors to the new header. The patches are vs the v5.7-rc5-mmotm-2020-05-13-20-30 v2 changes: * s/morotola/motorola in the changelog of patch 6 * replace FIXME with a comment about pmf_off and virt_to_kpte in patch 8 * rebase on v5.7-rc5-mmotm-2020-05-13-20-30 Mike Rapoport (12): mm: don't include asm/pgtable.h if linux/mm.h is already included mm: introduce include/linux/pgtable.h mm: reorder includes after introduction of linux/pgtable.h csky: replace definitions of __pXd_offset() with pXd_index() m68k/mm/motorola: move comment about page table allocation funcitons m68k/mm: move {cache,nocahe}_page() definitions close to their user x86/mm: simplify init_trampoline() and surrounding logic mm: pgtable: add shortcuts for accessing kernel PMD and PTE mm: consolidate pte_index() and pte_offset_*() definitions mm: consolidate pmd_index() and pmd_offset() definitions mm: consolidate pud_index() and pud_offset() definitions mm: consolidate pgd_index() and pgd_offset{_k}() definitions arch/alpha/boot/bootp.c | 1 - arch/alpha/boot/bootpz.c | 1 - arch/alpha/boot/main.c| 1 - arch/alpha/include/asm/io.h | 1 - arch/alpha/include/asm/pgtable.h | 16 +-- arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/proto.h | 2 - arch/alpha/kernel/ptrace.c| 1 - arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/smp.c | 1 - arch/alpha/kernel/sys_alcor.c | 1 - arch/alpha/kernel/sys_cabriolet.c | 1 - arch/alpha/kernel/sys_dp264.c | 1 - arch/alpha/kernel/sys_eb64p.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_jensen.c| 1 - arch/alpha/kernel/sys_marvel.c| 1 - arch/alpha/kernel/sys_miata.c | 1 - arch/alpha/kernel/sys_mikasa.c| 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/alpha/kernel/sys_noritake.c | 1 - arch/alpha/kernel/sys_rawhide.c | 1 - arch/alpha/kernel/sys_ruffian.c | 1 - arch/alpha/kernel/sys_rx164.c | 1 - arch/alpha/kernel/sys_sable.c | 1 - arch/alpha/kernel/sys_sio.c | 1 - arch/alpha/kernel/sys_sx164.c | 1 - arch/alpha/kernel/sys_takara.c| 1 - arch/alpha/kernel/sys_titan.c | 1 - arch/alpha/kernel/sys_wildfire.c | 1 - arch/alpha/mm/init.c | 1 - arch/arc/include/asm/pgtable.h| 24 arch/arc/mm/highmem.c | 12 +- arch/arc/mm/tlbex.S | 2 +- arch/arm/include/asm/efi.h| 1 - arch/arm/include/asm/fixmap.h | 2 +- arch/arm/include/asm/idmap.h | 2 +- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 7 - arch/arm/include/asm/pgtable-nommu.h | 3 - arch/arm/include/asm/pgtable.h| 25 arch/arm/kernel/head.S| 2 +- arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/module.c | 1 - arch/arm/kernel/ptrace.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm/kernel/suspend.c | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/arm/mach-ebsa110/core.c | 1 - arch/arm/mach-footbridge/common.c | 1 - arch/arm/mach-imx/mm-imx21.c | 1 - arch/arm/mach-imx/mm-imx27.c | 1 - arch/arm/mach-imx/mm-imx3.c | 1 - arch/arm/mach-integra
Re: [PATCH 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE
On Tue, May 12, 2020 at 12:24:41PM -0700, Matthew Wilcox wrote: > On Tue, May 12, 2020 at 09:44:18PM +0300, Mike Rapoport wrote: > > +++ b/include/linux/pgtable.h > > @@ -28,6 +28,24 @@ > > #define USER_PGTABLES_CEILING 0UL > > #endif > > > > +/* FIXME: */ > > Fix you what? Add documentation? Ouch, indeed :) > > +static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va) > > +{ > > + return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), > > va); > > +} -- Sincerely yours, Mike.
Re: [PATCH 03/12] mm: reorder includes after introduction of linux/pgtable.h
On Tue, May 12, 2020 at 12:20:13PM -0700, Matthew Wilcox wrote: > On Tue, May 12, 2020 at 09:44:13PM +0300, Mike Rapoport wrote: > > diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h > > index a093cd45ec79..701a05090141 100644 > > --- a/arch/alpha/kernel/proto.h > > +++ b/arch/alpha/kernel/proto.h > > @@ -2,8 +2,6 @@ > > #include > > #include > > > > -#include > > - > > /* Prototypes of functions used across modules here in this directory. */ > > > > #define vucp volatile unsigned char * > > Looks like your script has a bug if linux/pgtable.h is the last include > in the file? Script indeed cannot handle all the corner case, but this is not one of them. I've started initially to look into removing asm/pgtable.h if it was not needed, but I've run out of patience very soon. This file is what sneaked in from that attempt. -- Sincerely yours, Mike.
[PATCH 12/12] mm: consolidate pgd_index() and pgd_offset{_k}() definitions
From: Mike Rapoport All architectures tables define pgd_offset() as an entry in the array of PGDs indexed by the pgd_index(), where pgd_index() is (address >> PGD_SHIFT) & (PTRS_PER_PGD - 1) For the most cases, the pgd_offset() uses mm->pgd as the pointer to the top-level page directory and the pgd_offset_k() is a helper that presumes that mm == _mm. Use x86 implementation as the generic one and remove redundant definitions of PGD accessors in most of arch/*/include/asm/pgtable.h The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * ia64 has custom implementation of pgd_index() * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 9 arch/arc/include/asm/pgtable.h | 7 --- arch/arm/include/asm/pgtable.h | 8 arch/arm64/include/asm/pgtable.h | 10 arch/arm64/kernel/hibernate.c| 4 +- arch/arm64/mm/kasan_init.c | 2 +- arch/arm64/mm/mmu.c | 8 ++-- arch/csky/include/asm/pgtable.h | 11 - arch/hexagon/include/asm/pgtable.h | 18 --- arch/ia64/include/asm/pgtable.h | 14 +- arch/m68k/include/asm/mcf_pgtable.h | 11 - arch/m68k/include/asm/motorola_pgtable.h | 16 --- arch/m68k/include/asm/sun3_pgtable.h | 9 arch/microblaze/include/asm/pgtable.h| 7 --- arch/mips/include/asm/pgtable-32.h | 8 arch/mips/include/asm/pgtable-64.h | 8 arch/nds32/include/asm/pgtable.h | 6 --- arch/nios2/include/asm/pgtable.h | 8 arch/openrisc/include/asm/pgtable.h | 10 arch/parisc/include/asm/pgtable.h| 9 arch/powerpc/include/asm/book3s/32/pgtable.h | 7 --- arch/powerpc/include/asm/book3s/64/pgtable.h | 13 - arch/powerpc/include/asm/nohash/32/pgtable.h | 7 --- arch/powerpc/include/asm/nohash/64/pgtable.h | 12 - arch/riscv/include/asm/pgtable.h | 10 arch/riscv/mm/init.c | 12 ++--- arch/s390/include/asm/pgtable.h | 1 - arch/sh/include/asm/pgtable_32.h | 7 --- arch/sh/include/asm/pgtable_64.h | 11 - arch/sparc/include/asm/pgtable_32.h | 8 arch/sparc/include/asm/pgtable_64.h | 7 --- arch/um/include/asm/pgtable.h| 50 ++-- arch/unicore32/include/asm/pgtable.h | 8 arch/x86/include/asm/pgtable.h | 24 -- arch/xtensa/include/asm/pgtable.h| 8 include/linux/pgtable.h | 26 ++ 36 files changed, 55 insertions(+), 339 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 314973d2810d..162c17b2631f 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -276,15 +276,6 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) &= ~_PAGE_FOW; return extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; return pte; } extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= __ACCESS_BITS; return pte; } -#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address)) - -/* to find an entry in a kernel page-table-directory */ -#define pgd_offset_k(address) pgd_offset(_mm, (address)) - -/* to find an entry in a page-table-directory. */ -#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pgd_offset(mm, address)((mm)->pgd+pgd_index(address)) - /* * The smp_read_barrier_depends() in the following functions are required to * order the load of *dir (the pointer in the top level page table) with any diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 1146905f594b..f1ed17edb085 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -316,13 +316,6 @@ static inline void set_pte_at(struct mm_struct *mm, unsigned long addr, set_pte(ptep, pteval); } -/* - * All kernel related VM pages are in init's mm. - */ -#define pgd_offset_k(address) pgd_offset(_mm, address) -#define pgd_index(addr)((addr) >> PGDIR_SHIFT) -#define pgd_offset(mm, addr) (((mm)->pgd)+pgd_index(addr)) - /* * Macro to quickly access the PGD entry, utlising the fact that some * arch may cache the pointer to Page Directory of "current" task diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index 41543bc47660..c02f24400369 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -166,14 +166,6 @@ extern struct page *empty_zero_page; extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; -/* to find an entry in
[PATCH 11/12] mm: consolidate pud_index() and pud_offset() definitions
From: Mike Rapoport All architectures that have at least four-level page tables define pud_offset() as an entry in the array of PUDs indexed by the pud_index(), where pud_index() is (address >> PUD_SHIFT) & (PTRS_PER_PUD - 1) For the most architectures the pud_offset() implementation relies on the availability of pud_page_vaddr() that converts a PUD entry value to the virtual address of the page containing PUD array. Let's use such implementation as a generic and drop most of the definitions of pud_index() and pud_offset() in files. The architectures that didn't provide pud_page_vaddr() are updated to have that defined. The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/arm64/include/asm/pgtable.h| 8 +--- arch/csky/include/asm/pgtable.h | 2 -- arch/ia64/include/asm/pgtable.h | 6 -- arch/mips/include/asm/pgtable-32.h | 1 - arch/mips/include/asm/pgtable-64.h | 7 --- arch/powerpc/include/asm/book3s/64/pgtable.h| 4 arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 4 arch/s390/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable_32.h| 2 -- arch/sh/include/asm/pgtable_64.h| 2 -- arch/sparc/include/asm/pgtable_64.h | 5 - arch/x86/include/asm/pgtable.h | 11 --- include/asm-generic/pgtable-nopud.h | 1 + include/linux/pgtable.h | 16 14 files changed, 23 insertions(+), 47 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d0175335a2f2..c0155814b374 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -626,11 +626,13 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d) return __p4d_to_phys(p4d); } -/* Find an entry in the frst-level page table. */ -#define pud_index(addr)(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline unsigned long p4d_page_vaddr(p4d_t p4d) +{ + return (unsigned long)__va(p4d_page_paddr(p4d)); +} +/* Find an entry in the frst-level page table. */ #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t)) -#define pud_offset(dir, addr) ((pud_t *)__va(pud_offset_phys((dir), (addr #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr)) #define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr)) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index dc613f20e2e1..c5ab20970857 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -220,8 +220,6 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) - /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(_mm, address) diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index d75d981e6ee1..4c24e5e18bff 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -383,12 +383,6 @@ pgd_offset (const struct mm_struct *mm, unsigned long address) here. */ #define pgd_offset_gate(mm, addr) pgd_offset_k(addr) -#if CONFIG_PGTABLE_LEVELS == 4 -/* Find an entry in the second-level page table.. */ -#define pud_offset(dir,addr) \ - ((pud_t *) p4d_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) -#endif - /* atomic versions of the some PTE manipulations: */ static inline int diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index 05b51c344939..9c0e7a5ffc75 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -199,7 +199,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) #define pgd_offset_k(address) pgd_offset(_mm, address) #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) /* to find an entry in a page-table-directory */ #define pgd_offset(mm, addr) ((mm)->pgd + pgd_index(addr)) diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 21fb55510d35..38170fdac5bf 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -172,8 +172,6 @@ extern pte_t invalid_pte_table[PTRS_PER_PTE]; -#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) - #ifndef __PAGETABLE_PUD_FOLDED /* * For 4-level
[PATCH 10/12] mm: consolidate pmd_index() and pmd_offset() definitions
From: Mike Rapoport All architectures define pmd_index() as (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1) and all architectures that have at least three-level page tables define pmd_offset() as an entry in the array of PMDs indexed by the pmd_index(). For the most architectures the pmd_offset() implementation relies on the availability of pud_page_vaddr() that converts a PMD entry value to the virtual address of the page containing PMD array. Let's use such implementation as a generic and drop most of the definitions of pmd_index() and pmd_offset() in files. The architectures that didn't provide pud_page_vaddr() are updated to have that defined. The generic implementation can be overridden by an architecture and this ability is currently in use by there architectures: * alpha has special requirements for memory access ordering * arm has custom definition of folded 2-level page tables * s390 has custom definitions of all page table accessors Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 1 + arch/arm/include/asm/pgtable-2level.h| 1 + arch/arm/include/asm/pgtable-3level.h| 7 - arch/arm/include/asm/pgtable-nommu.h | 1 - arch/arm64/include/asm/pgtable.h | 8 -- arch/c6x/include/asm/pgtable.h | 1 - arch/csky/include/asm/pgtable.h | 1 - arch/hexagon/include/asm/pgtable.h | 9 -- arch/ia64/include/asm/pgtable.h | 4 --- arch/m68k/include/asm/motorola_pgtable.h | 7 - arch/microblaze/include/asm/pgtable.h| 1 - arch/mips/include/asm/pgtable-32.h | 1 - arch/mips/include/asm/pgtable-64.h | 6 arch/parisc/include/asm/pgtable.h| 8 -- arch/parisc/kernel/pci-dma.c | 2 +- arch/powerpc/include/asm/book3s/64/pgtable.h | 3 -- arch/powerpc/include/asm/nohash/64/pgtable.h | 3 -- arch/riscv/include/asm/pgtable-64.h | 7 - arch/riscv/mm/init.c | 12 arch/s390/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable-3level.h | 7 - arch/sh/include/asm/pgtable_32.h | 1 - arch/sh/include/asm/pgtable_64.h | 1 - arch/sparc/include/asm/pgtable_32.h | 9 +- arch/sparc/include/asm/pgtable_64.h | 7 + arch/um/include/asm/pgtable-3level.h | 4 --- arch/um/include/asm/pgtable.h| 4 --- arch/x86/include/asm/pgtable.h | 17 include/asm-generic/pgtable-nopmd.h | 1 + include/linux/pgtable.h | 29 ++-- 30 files changed, 44 insertions(+), 120 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index dac20d03b727..314973d2810d 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -305,6 +305,7 @@ extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long address) smp_read_barrier_depends(); /* see above */ return ret; } +#define pmd_offset pmd_offset /* Find an entry in the third-level page table.. */ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) diff --git a/arch/arm/include/asm/pgtable-2level.h b/arch/arm/include/asm/pgtable-2level.h index 9e084a464a97..3502c2f746ca 100644 --- a/arch/arm/include/asm/pgtable-2level.h +++ b/arch/arm/include/asm/pgtable-2level.h @@ -187,6 +187,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { return (pmd_t *)pud; } +#define pmd_offset pmd_offset #define pmd_large(pmd) (pmd_val(pmd) & 2) #define pmd_leaf(pmd) (pmd_val(pmd) & 2) diff --git a/arch/arm/include/asm/pgtable-3level.h b/arch/arm/include/asm/pgtable-3level.h index 1933aed9f68d..fbb6693c3352 100644 --- a/arch/arm/include/asm/pgtable-3level.h +++ b/arch/arm/include/asm/pgtable-3level.h @@ -133,13 +133,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -/* Find an entry in the second-level page table.. */ -#define pmd_index(addr)(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} - #define pmd_bad(pmd) (!(pmd_val(pmd) & 2)) #define copy_pmd(pmdpd,pmdps) \ diff --git a/arch/arm/include/asm/pgtable-nommu.h b/arch/arm/include/asm/pgtable-nommu.h index 1a758f14e0c3..d16aba48fa0a 100644 --- a/arch/arm/include/asm/pgtable-nommu.h +++ b/arch/arm/include/asm/pgtable-nommu.h @@ -22,7 +22,6 @@ #define pgd_bad(pgd) (0) #define pgd_clear(pgdp) #define kern_addr_valid(addr) (1) -#definepmd_offset(a, b)((void *)0) /* FIXME */ /* * PMD_SHIFT determines the size of the area a second-level page table can map diff
[PATCH 09/12] mm: consolidate pte_index() and pte_offset_*() definitions
From: Mike Rapoport All architectures define pte_index() as (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1) and all architectures define pte_offset_kernel() as an entry in the array of PTEs indexed by the pte_index(). For the most architectures the pte_offset_kernel() implementation relies on the availability of pmd_page_vaddr() that converts a PMD entry value to the virtual address of the page containing PTEs array. Let's move x86 definitions of the PTE accessors to the generic place in and then simply drop the respective definitions from the other architectures. The architectures that didn't provide pmd_page_vaddr() are updated to have that defined. The generic implementation of pte_offset_kernel() can be overridden by an architecture and alpha makes use of this because it has special ordering requirements for its version of pte_offset_kernel(). Signed-off-by: Mike Rapoport --- arch/alpha/include/asm/pgtable.h | 4 +-- arch/arc/include/asm/pgtable.h | 15 -- arch/arm/include/asm/pgtable.h | 15 -- arch/arm64/include/asm/pgtable.h | 10 +++ arch/csky/include/asm/pgtable.h | 16 --- arch/hexagon/include/asm/pgtable.h | 25 +++-- arch/ia64/include/asm/pgtable.h | 9 -- arch/m68k/include/asm/mcf_pgtable.h | 12 +--- arch/m68k/include/asm/motorola_pgalloc.h | 2 +- arch/m68k/include/asm/motorola_pgtable.h | 11 +--- arch/m68k/include/asm/sun3_pgtable.h | 15 -- arch/m68k/mm/init.c | 2 +- arch/microblaze/include/asm/pgtable.h| 13 +++-- arch/mips/include/asm/pgtable-32.h | 12 arch/mips/include/asm/pgtable-64.h | 11 arch/mips/kvm/mmu.c | 20 +++--- arch/mips/kvm/trap_emul.c| 2 +- arch/nds32/include/asm/pgtable.h | 12 +++- arch/nios2/include/asm/pgtable.h | 14 +++--- arch/openrisc/include/asm/pgtable.h | 23 arch/parisc/include/asm/pgtable.h| 15 +++--- arch/powerpc/include/asm/book3s/32/pgtable.h | 13 ++--- arch/powerpc/include/asm/book3s/64/pgtable.h | 8 -- arch/powerpc/include/asm/nohash/32/pgtable.h | 13 ++--- arch/powerpc/include/asm/nohash/64/pgtable.h | 7 - arch/powerpc/include/asm/pgtable.h | 7 + arch/riscv/include/asm/pgtable.h | 10 --- arch/riscv/mm/init.c | 6 ++-- arch/s390/include/asm/pgtable.h | 10 ++- arch/s390/mm/pageattr.c | 2 +- arch/sh/include/asm/pgtable_32.h | 15 -- arch/sh/include/asm/pgtable_64.h | 12 arch/sparc/include/asm/pgalloc_64.h | 2 +- arch/sparc/include/asm/pgtable_32.h | 15 -- arch/sparc/include/asm/pgtable_64.h | 12 ++-- arch/sparc/mm/srmmu.c| 10 --- arch/um/include/asm/pgtable.h| 13 - arch/unicore32/include/asm/pgtable.h | 9 -- arch/x86/include/asm/pgtable.h | 19 - arch/x86/include/asm/pgtable_32.h| 11 arch/x86/include/asm/pgtable_64.h| 4 --- arch/xtensa/include/asm/pgtable.h| 10 +-- include/linux/pgtable.h | 29 43 files changed, 113 insertions(+), 382 deletions(-) diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h index 1c263922beb3..dac20d03b727 100644 --- a/arch/alpha/include/asm/pgtable.h +++ b/arch/alpha/include/asm/pgtable.h @@ -314,9 +314,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address) smp_read_barrier_depends(); /* see above */ return ret; } - -#define pte_offset_map(dir,addr) pte_offset_kernel((dir),(addr)) -#define pte_unmap(pte) do { } while (0) +#define pte_offset_kernel pte_offset_kernel extern pgd_t swapper_pg_dir[1024]; diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index 29137bcedd93..1146905f594b 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -248,9 +248,6 @@ extern char empty_zero_page[PAGE_SIZE]; #define ZERO_PAGE(vaddr) (virt_to_page(empty_zero_page)) -#define pte_unmap(pte) do { } while (0) -#define pte_unmap_nested(pte) do { } while (0) - #define set_pte(pteptr, pteval)((*(pteptr)) = (pteval)) #define set_pmd(pmdptr, pmdval)(*(pmdptr) = pmdval) @@ -282,18 +279,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep) /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/ #define pte_pfn(pte) (pte_val(pte) >> PAGE_SHIFT) -#define __pte_index(addr) (((addr) >> PAGE_SHIF
[PATCH 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE
From: Mike Rapoport The powerpc 32-bit implementation of pgtable has nice shortcuts for accessing kernel PMD and PTE for a given virtual address. Make this helpers available for all architectures. Signed-off-by: Mike Rapoport --- arch/arc/mm/highmem.c | 10 +--- arch/arm/mach-sa1100/assabet.c| 2 +- arch/arm/mm/highmem.c | 4 ++-- arch/arm/mm/ioremap.c | 31 arch/arm/mm/mm.h | 5 arch/arm/mm/mmu.c | 7 +- arch/hexagon/include/asm/fixmap.h | 4 arch/m68k/mm/motorola.c | 26 arch/microblaze/kernel/signal.c | 8 +-- arch/microblaze/mm/init.c | 9 --- arch/mips/include/asm/fixmap.h| 3 --- arch/mips/mm/c-r3k.c | 10 ++-- arch/mips/mm/c-r4k.c | 10 ++-- arch/mips/mm/c-tx39.c | 10 ++-- arch/mips/mm/highmem.c| 2 +- arch/nds32/include/asm/pgtable.h | 2 -- arch/nds32/mm/init.c | 13 ++ arch/nds32/mm/proc.c | 6 + arch/parisc/mm/fixmap.c | 6 + arch/powerpc/include/asm/pgtable.h| 19 --- arch/powerpc/mm/book3s32/mmu.c| 2 +- arch/powerpc/mm/book3s32/tlb.c| 4 ++-- arch/powerpc/mm/kasan/kasan_init_32.c | 8 +++ arch/powerpc/mm/nohash/40x.c | 4 ++-- arch/powerpc/mm/pgtable_32.c | 2 +- arch/s390/mm/pageattr.c | 10 +--- arch/sh/mm/cache-sh4.c| 8 +-- arch/sh/mm/kmap.c | 5 +--- arch/sparc/mm/highmem.c | 12 ++ arch/sparc/mm/init_64.c | 6 + arch/sparc/mm/io-unit.c | 10 ++-- arch/sparc/mm/iommu.c | 8 +-- arch/sparc/mm/srmmu.c | 34 +++ arch/um/kernel/mem.c | 10 +--- arch/um/kernel/trap.c | 8 +-- arch/unicore32/mm/mm.h| 10 arch/x86/mm/init_32.c | 26 +++- arch/xtensa/include/asm/fixmap.h | 8 --- arch/xtensa/mm/highmem.c | 2 +- arch/xtensa/mm/kasan_init.c | 10 ++-- arch/xtensa/mm/mmu.c | 5 +--- include/linux/pgtable.h | 18 ++ 42 files changed, 74 insertions(+), 323 deletions(-) diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c index d6b74883fd1f..1b9f473c6369 100644 --- a/arch/arc/mm/highmem.c +++ b/arch/arc/mm/highmem.c @@ -92,17 +92,9 @@ EXPORT_SYMBOL(kunmap_atomic_high); static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr) { - pgd_t *pgd_k; - p4d_t *p4d_k; - pud_t *pud_k; - pmd_t *pmd_k; + pmd_t *pmd_k = pmd_off_k(kvaddr); pte_t *pte_k; - pgd_k = pgd_offset_k(kvaddr); - p4d_k = p4d_offset(pgd_k, kvaddr); - pud_k = pud_offset(p4d_k, kvaddr); - pmd_k = pmd_offset(pud_k, kvaddr); - pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE); if (!pte_k) panic("%s: Failed to allocate %lu bytes align=0x%lx\n", diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c index 8e3f5fdb4883..aa265ede5730 100644 --- a/arch/arm/mach-sa1100/assabet.c +++ b/arch/arm/mach-sa1100/assabet.c @@ -632,7 +632,7 @@ static void __init map_sa1100_gpio_regs( void ) int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO); pmd_t *pmd; - pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(virt), virt), virt), virt); + pmd = pmd_off_k(virt); *pmd = __pmd(phys | prot); flush_pmd_entry(pmd); } diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c index e013f6b81328..187fab227b50 100644 --- a/arch/arm/mm/highmem.c +++ b/arch/arm/mm/highmem.c @@ -18,7 +18,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte) { unsigned long vaddr = __fix_to_virt(idx); - pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *ptep = virt_to_kpte(vaddr); set_pte_ext(ptep, pte, 0); local_flush_tlb_kernel_page(vaddr); @@ -26,7 +26,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte) static inline pte_t get_fixmap_pte(unsigned long vaddr) { - pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr); + pte_t *ptep = virt_to_kpte(vaddr); return *ptep; } diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c index 75529d76d28c..000e821b 100644 --- a/arch/arm/mm/ioremap.c +++ b/arch/arm/mm/ioremap.c @@ -141,16 +141,8 @@ void __check_vmalloc_seq(struct mm_struct *mm) static void unmap_area_sections(unsigned long virt, unsigned long size) { unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1)); - pgd_t *pgd; - p4d_t *p4d; -
[PATCH 07/12] x86/mm: simplify init_trampoline() and surrounding logic
From: Mike Rapoport There are three cases for the trampoline initialization: * 32-bit does nothing * 64-bit with kaslr disabled simply copies a PGD entry from the direct map to the trampoline PGD * 64-bit with kaslr enabled maps the real mode trampoline at PUD level These cases are currently differentiated by a bunch of ifdefs inside asm/include/pgtable.h and the case of 64-bits with kaslr on uses pgd_index() helper. Replacing the ifdefs with a static function in arch/x86/mm/init.c gives clearer code and allows moving pgd_index() to the generic implementation in include/linux/pgtable.h Signed-off-by: Mike Rapoport --- arch/x86/include/asm/kaslr.h | 2 ++ arch/x86/include/asm/pgtable.h | 15 +-- arch/x86/include/asm/setup.h | 9 + arch/x86/mm/init.c | 22 ++ arch/x86/mm/kaslr.c| 33 + 5 files changed, 35 insertions(+), 46 deletions(-) diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h index db7ba2feb947..0648190467ba 100644 --- a/arch/x86/include/asm/kaslr.h +++ b/arch/x86/include/asm/kaslr.h @@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose); #ifdef CONFIG_RANDOMIZE_MEMORY void kernel_randomize_memory(void); +void init_trampoline_kaslr(void); #else static inline void kernel_randomize_memory(void) { } +static inline void init_trampoline_kaslr(void) {} #endif /* CONFIG_RANDOMIZE_MEMORY */ #endif diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index d24f8e1f7250..6366136b0e46 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1070,27 +1070,14 @@ void init_mem_mapping(void); void early_alloc_pgt_buf(void); extern void memblock_find_dma_reserve(void); + #ifdef CONFIG_X86_64 -/* Realmode trampoline initialization. */ extern pgd_t trampoline_pgd_entry; -static inline void __meminit init_trampoline_default(void) -{ - /* Default trampoline pgd value */ - trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; -} void __init poking_init(void); unsigned long init_memory_mapping(unsigned long start, unsigned long end, pgprot_t prot); - -# ifdef CONFIG_RANDOMIZE_MEMORY -void __meminit init_trampoline(void); -# else -# define init_trampoline init_trampoline_default -# endif -#else -static inline void init_trampoline(void) { } #endif /* local pte updates need not use xchg for locking */ diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h index ed8ec011a9fd..d95cacf210bb 100644 --- a/arch/x86/include/asm/setup.h +++ b/arch/x86/include/asm/setup.h @@ -78,6 +78,15 @@ static inline bool kaslr_enabled(void) return !!(boot_params.hdr.loadflags & KASLR_FLAG); } +/* + * Apply no randomization if KASLR was disabled at boot or if KASAN + * is enabled. KASAN shadow mappings rely on regions being PGD aligned. + */ +static inline bool kaslr_memory_enabled(void) +{ + return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); +} + static inline unsigned long kaslr_offset(void) { return (unsigned long)&_text - __START_KERNEL; diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c index 235dd0e35741..e225ebb25197 100644 --- a/arch/x86/mm/init.c +++ b/arch/x86/mm/init.c @@ -682,6 +682,28 @@ static void __init memory_map_bottom_up(unsigned long map_start, } } +/* + * The real mode trampoline, which is required for bootstrapping CPUs + * occupies only a small area under the low 1MB. See reserve_real_mode() + * for details. + * + * If KASLR is disabled the first PGD entry of the direct mapping is copied + * to map the real mode trampoline. + * + * If KASLR is enabled, copy only the PUD which covers the low 1MB + * area. This limits the randomization granularity to 1GB for both 4-level + * and 5-level paging. + */ +static void __init init_trampoline(void) +{ +#ifdef CONFIG_X86_64 + if (!kaslr_memory_enabled()) + trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)]; + else + init_trampoline_kaslr(); +#endif +} + void __init init_mem_mapping(void) { unsigned long end; diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index faf02e1e1517..fb620fd9dae9 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -61,15 +61,6 @@ static inline unsigned long get_padding(struct kaslr_memory_region *region) return (region->size_tb << TB_SHIFT); } -/* - * Apply no randomization if KASLR was disabled at boot or if KASAN - * is enabled. KASAN shadow mappings rely on regions being PGD aligned. - */ -static inline bool kaslr_memory_enabled(void) -{ - return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN); -} - /* Initialize base and padding for each memory region randomized with KASLR */ void __init kernel_randomize_memory(void) { @@ -148,7 +139,7 @@ void __init kernel_randomize_memory(void)
[PATCH 06/12] m68k/mm: move {cache, nocahe}_page() definitions close to their user
From: Mike Rapoport The cache_page() and nocache_page() functions are only used by the morotola MMU variant for setting caching attributes for the page table pages. Move the definitions of these functions from arch/m68k/include/asm/motorola_pgtable.h closer to their usage in arch/m68k/mm/motorola.c and drop unused definition in arch/m68k/include/asm/mcf_pgtable.h. Signed-off-by: Mike Rapoport --- arch/m68k/include/asm/mcf_pgtable.h | 40 - arch/m68k/include/asm/motorola_pgtable.h | 44 arch/m68k/mm/motorola.c | 43 +++ 3 files changed, 43 insertions(+), 84 deletions(-) diff --git a/arch/m68k/include/asm/mcf_pgtable.h b/arch/m68k/include/asm/mcf_pgtable.h index 0031cd387b75..737e826294f3 100644 --- a/arch/m68k/include/asm/mcf_pgtable.h +++ b/arch/m68k/include/asm/mcf_pgtable.h @@ -328,46 +328,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD]; #define pte_offset_kernel(dir, address) \ ((pte_t *) __pmd_page(*(dir)) + __pte_offset(address)) -/* - * Disable caching for page at given kernel virtual address. - */ -static inline void nocache_page(void *vaddr) -{ - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - unsigned long addr = (unsigned long) vaddr; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mknocache(*ptep); -} - -/* - * Enable caching for page at given kernel virtual address. - */ -static inline void cache_page(void *vaddr) -{ - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - unsigned long addr = (unsigned long) vaddr; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mkcache(*ptep); -} - /* * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 9e5a3de21e15..e1594acf7c7e 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -227,50 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, unsigned long address) #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_unmap(pte) ((void)0) -/* Prior to calling these routines, the page should have been flushed - * from both the cache and ATC, or the CPU might not notice that the - * cache setting for the page has been changed. -jskov - */ -static inline void nocache_page(void *vaddr) -{ - unsigned long addr = (unsigned long)vaddr; - - if (CPU_IS_040_OR_060) { - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mknocache(*ptep); - } -} - -static inline void cache_page(void *vaddr) -{ - unsigned long addr = (unsigned long)vaddr; - - if (CPU_IS_040_OR_060) { - pgd_t *dir; - p4d_t *p4dp; - pud_t *pudp; - pmd_t *pmdp; - pte_t *ptep; - - dir = pgd_offset_k(addr); - p4dp = p4d_offset(dir, addr); - pudp = pud_offset(p4dp, addr); - pmdp = pmd_offset(pudp, addr); - ptep = pte_offset_kernel(pmdp, addr); - *ptep = pte_mkcache(*ptep); - } -} - /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */ #define __swp_type(x) (((x).val >> 4) & 0xff) #define __swp_offset(x)((x).val >> 12) diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c index 904c2a663977..8e5e74121a78 100644 --- a/arch/m68k/mm/motorola.c +++ b/arch/m68k/mm/motorola.c @@ -45,6 +45,49 @@ unsigned long mm_cachebits; EXPORT_SYMBOL(mm_cachebits); #endif +/* Prior to calling these routines, the page should have been flushed + * from both the cache and ATC, or the CPU might not notice that the + * cache setting for the page has been changed. -jskov + */ +static inline void nocache_page(void *vaddr) +{ + unsigned long addr = (unsigned long)vaddr; + + if (CPU_IS_040_OR_060) { + pgd_t *dir; + p4d_t *p4dp; + pud_t *pudp; + pmd_t *pmdp; + pte_t *ptep;
[PATCH 05/12] m68k/mm/motorola: move comment about page table allocation funcitons
From: Mike Rapoport The comment about page table allocation functions resides in include/asm/motorola_pgtable.h while the functions live in include/asm/motorola_pgaloc.h. Move the comment close to the code. Signed-off-by: Mike Rapoport --- arch/m68k/include/asm/motorola_pgalloc.h | 6 ++ arch/m68k/include/asm/motorola_pgtable.h | 6 -- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/m68k/include/asm/motorola_pgalloc.h b/arch/m68k/include/asm/motorola_pgalloc.h index c66e42917912..f3cb453a07b7 100644 --- a/arch/m68k/include/asm/motorola_pgalloc.h +++ b/arch/m68k/include/asm/motorola_pgalloc.h @@ -18,6 +18,12 @@ extern void init_pointer_table(void *table, int type); extern void *get_pointer_table(int type); extern int free_pointer_table(void *table, int type); +/* + * Allocate and free page tables. The xxx_kernel() versions are + * used to allocate a kernel page table - this turns on ASN bits + * if any. + */ + static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm) { return get_pointer_table(TABLE_PTE); diff --git a/arch/m68k/include/asm/motorola_pgtable.h b/arch/m68k/include/asm/motorola_pgtable.h index 48f19f0ab1e7..9e5a3de21e15 100644 --- a/arch/m68k/include/asm/motorola_pgtable.h +++ b/arch/m68k/include/asm/motorola_pgtable.h @@ -227,12 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, unsigned long address) #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_unmap(pte) ((void)0) -/* - * Allocate and free page tables. The xxx_kernel() versions are - * used to allocate a kernel page table - this turns on ASN bits - * if any. - */ - /* Prior to calling these routines, the page should have been flushed * from both the cache and ATC, or the CPU might not notice that the * cache setting for the page has been changed. -jskov -- 2.26.1
[PATCH 04/12] csky: replace definitions of __pXd_offset() with pXd_index()
From: Mike Rapoport All architectures use pXd_index() to get an entry in the page table page corresponding to a virtual address. Align csky with other architectures. Signed-off-by: Mike Rapoport --- arch/csky/include/asm/pgtable.h | 5 ++--- arch/csky/mm/fault.c| 2 +- arch/csky/mm/highmem.c | 2 +- arch/csky/mm/init.c | 6 +++--- 4 files changed, 7 insertions(+), 8 deletions(-) diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h index 9d2d16db237d..2eff4aea51b3 100644 --- a/arch/csky/include/asm/pgtable.h +++ b/arch/csky/include/asm/pgtable.h @@ -229,9 +229,8 @@ static inline pte_t pte_mkyoung(pte_t pte) return pte; } -#define __pgd_offset(address) pgd_index(address) -#define __pud_offset(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) -#define __pmd_offset(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) +#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) +#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(_mm, address) diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c index 4e6dc68f3258..4055d430c0c8 100644 --- a/arch/csky/mm/fault.c +++ b/arch/csky/mm/fault.c @@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write, * Do _not_ use "tsk" here. We might be inside * an interrupt in the middle of a task switch.. */ - int offset = __pgd_offset(address); + int offset = pgd_index(address); pgd_t *pgd, *pgd_k; pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c index 3b3f622f5ae9..89ec32e602a1 100644 --- a/arch/csky/mm/highmem.c +++ b/arch/csky/mm/highmem.c @@ -92,7 +92,7 @@ static void __init kmap_pages_init(void) vaddr = PKMAP_BASE; fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir); - pgd = swapper_pg_dir + __pgd_offset(vaddr); + pgd = swapper_pg_dir + pgd_index(vaddr); pud = (pud_t *)pgd; pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c index eda2b4291485..af627128314f 100644 --- a/arch/csky/mm/init.c +++ b/arch/csky/mm/init.c @@ -157,9 +157,9 @@ void __init fixrange_init(unsigned long start, unsigned long end, unsigned long vaddr; vaddr = start; - i = __pgd_offset(vaddr); - j = __pud_offset(vaddr); - k = __pmd_offset(vaddr); + i = pgd_index(vaddr); + j = pud_index(vaddr); + k = pmd_index(vaddr); pgd = pgd_base + i; for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) { -- 2.26.1
[PATCH 03/12] mm: reorder includes after introduction of linux/pgtable.h
From: Mike Rapoport The replacement of with made the include of the latter in the middle of asm includes. Fix this up with the aid of the below script and manual adjustments here and there. import sys import re if len(sys.argv) is not 3: print "USAGE: %s " % (sys.argv[0]) sys.exit(1) hdr_to_move="#include " % sys.argv[2] moved = False in_hdrs = False with open(sys.argv[1], "r") as f: lines = f.readlines() for _line in lines: line = _line.rstrip('\n') if line == hdr_to_move: continue if line.startswith("#include --- arch/alpha/kernel/proto.h | 2 -- arch/arc/mm/highmem.c | 2 +- arch/arc/mm/tlbex.S | 2 +- arch/arm/include/asm/efi.h| 1 - arch/arm/include/asm/fixmap.h | 2 +- arch/arm/kernel/head.S| 2 +- arch/arm/kernel/suspend.c | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/arm/mach-integrator/core.c | 2 +- arch/arm/mach-keystone/platsmp.c | 2 +- arch/arm/mach-sa1100/hackkit.c| 2 +- arch/arm/mach-zynq/common.c | 2 +- arch/arm/mm/idmap.c | 2 +- arch/arm/mm/mm.h | 1 - arch/arm/mm/proc-arm1020.S| 2 +- arch/arm/mm/proc-arm1020e.S | 2 +- arch/arm/mm/proc-arm1022.S| 2 +- arch/arm/mm/proc-arm1026.S| 2 +- arch/arm/mm/proc-arm720.S | 2 +- arch/arm/mm/proc-arm740.S | 2 +- arch/arm/mm/proc-arm7tdmi.S | 2 +- arch/arm/mm/proc-arm920.S | 2 +- arch/arm/mm/proc-arm922.S | 2 +- arch/arm/mm/proc-arm925.S | 2 +- arch/arm/mm/proc-arm926.S | 2 +- arch/arm/mm/proc-arm940.S | 2 +- arch/arm/mm/proc-arm946.S | 2 +- arch/arm/mm/proc-arm9tdmi.S | 2 +- arch/arm/mm/proc-fa526.S | 2 +- arch/arm/mm/proc-feroceon.S | 2 +- arch/arm/mm/proc-mohawk.S | 2 +- arch/arm/mm/proc-sa110.S | 2 +- arch/arm/mm/proc-sa1100.S | 2 +- arch/arm/mm/proc-v6.S | 2 +- arch/arm/mm/proc-v7.S | 2 +- arch/arm/mm/proc-xsc3.S | 2 +- arch/arm/mm/proc-xscale.S | 2 +- arch/arm/mm/pv-fixup-asm.S| 2 +- arch/arm64/include/asm/io.h | 2 +- arch/arm64/include/asm/kvm_mmu.h | 2 +- arch/arm64/include/asm/mmu_context.h | 2 +- arch/arm64/include/asm/vmap_stack.h | 2 +- arch/arm64/kernel/acpi.c | 2 +- arch/arm64/kernel/head.S | 2 +- arch/arm64/kernel/kaslr.c | 2 +- arch/arm64/kernel/suspend.c | 2 +- arch/arm64/kernel/vmlinux.lds.S | 1 - arch/arm64/mm/proc.S | 2 +- arch/ia64/kernel/entry.S | 2 +- arch/ia64/kernel/head.S | 3 ++- arch/ia64/kernel/irq_ia64.c | 2 +- arch/ia64/kernel/ivt.S| 2 +- arch/ia64/kernel/kprobes.c| 2 +- arch/ia64/kernel/mca_asm.S| 2 +- arch/ia64/kernel/relocate_kernel.S| 4 +--- arch/ia64/kernel/setup.c | 2 +- arch/ia64/kernel/uncached.c | 2 +- arch/ia64/kernel/vmlinux.lds.S| 2 +- arch/m68k/68000/m68VZ328.c| 2 +- arch/m68k/include/asm/sun3xflop.h | 2 +- arch/m68k/kernel/head.S | 2 +- arch/microblaze/include/asm/pgalloc.h | 2 +- arch/microblaze/kernel/hw_exception_handler.S | 2 +- arch/microblaze/kernel/module.c | 2 +- arch/microblaze/kernel/setup.c| 2 +- arch/microblaze/mm/pgtable.c | 2 +- arch/mips/jazz/irq.c | 2 +- arch/mips/jazz/setup.c| 2 +- arch/mips/kvm/mips.c
[PATCH 01/12] mm: don't include asm/pgtable.h if linux/mm.h is already included
From: Mike Rapoport The linux/mm.h header includes to allow inlining of the functions involving page table manipulations, e.g. pte_alloc() and pmd_alloc(). So, there is no point to explicitly include in the files that include . The include statements in such cases are remove with a simple loop: for f in $(git grep -l "include ") ; do sed -i -e '/include / d' $f done Signed-off-by: Mike Rapoport --- arch/alpha/boot/bootp.c | 1 - arch/alpha/boot/bootpz.c | 1 - arch/alpha/boot/main.c| 1 - arch/alpha/include/asm/io.h | 1 - arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/ptrace.c| 1 - arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/smp.c | 1 - arch/alpha/kernel/sys_alcor.c | 1 - arch/alpha/kernel/sys_cabriolet.c | 1 - arch/alpha/kernel/sys_dp264.c | 1 - arch/alpha/kernel/sys_eb64p.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_jensen.c| 1 - arch/alpha/kernel/sys_marvel.c| 1 - arch/alpha/kernel/sys_miata.c | 1 - arch/alpha/kernel/sys_mikasa.c| 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/alpha/kernel/sys_noritake.c | 1 - arch/alpha/kernel/sys_rawhide.c | 1 - arch/alpha/kernel/sys_ruffian.c | 1 - arch/alpha/kernel/sys_rx164.c | 1 - arch/alpha/kernel/sys_sable.c | 1 - arch/alpha/kernel/sys_sio.c | 1 - arch/alpha/kernel/sys_sx164.c | 1 - arch/alpha/kernel/sys_takara.c| 1 - arch/alpha/kernel/sys_titan.c | 1 - arch/alpha/kernel/sys_wildfire.c | 1 - arch/alpha/mm/init.c | 1 - arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/module.c | 1 - arch/arm/kernel/ptrace.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm/mach-ebsa110/core.c | 1 - arch/arm/mach-footbridge/common.c | 1 - arch/arm/mach-imx/mm-imx21.c | 1 - arch/arm/mach-imx/mm-imx27.c | 1 - arch/arm/mach-imx/mm-imx3.c | 1 - arch/arm/mach-iop32x/i2c.c| 1 - arch/arm/mach-iop32x/iq31244.c| 1 - arch/arm/mach-iop32x/iq80321.c| 1 - arch/arm/mach-iop32x/n2100.c | 1 - arch/arm/mach-ixp4xx/common.c | 1 - arch/arm/mach-sa1100/assabet.c| 1 - arch/arm/mm/copypage-v4mc.c | 1 - arch/arm/mm/copypage-v6.c | 1 - arch/arm/mm/copypage-xscale.c | 1 - arch/arm/mm/dump.c| 1 - arch/arm/mm/fault-armv.c | 1 - arch/arm/mm/fault.c | 1 - arch/arm/mm/pageattr.c| 1 - arch/arm64/kernel/hibernate.c | 1 - arch/arm64/kernel/ptrace.c| 1 - arch/arm64/kernel/smp.c | 1 - arch/arm64/mm/dump.c | 1 - arch/arm64/mm/fault.c | 1 - arch/arm64/mm/kasan_init.c| 1 - arch/arm64/mm/pageattr.c | 1 - arch/csky/kernel/module.c | 1 - arch/csky/kernel/ptrace.c | 1 - arch/csky/mm/init.c | 1 - arch/csky/mm/tlb.c| 1 - arch/h8300/kernel/process.c | 1 - arch/h8300/kernel/setup.c | 1 - arch/h8300/kernel/signal.c| 1 - arch/h8300/mm/fault.c | 1 - arch/h8300/mm/init.c | 1 - arch/h8300/mm/memory.c| 1 - arch/hexagon/mm/vm_fault.c| 1 - arch/ia64/kernel/efi.c| 1 - arch/ia64/kernel/ptrace.c | 1 - arch/ia64/kernel/smp.c| 1 - arch/ia64/kernel/smpboot.c| 1 - arch/ia64/mm/contig.c | 1 - arch/ia64/mm/fault.c | 1 - arch/m68k/68000/timers.c | 1 - arch/m68k/amiga/config.c | 1 - arch/m68k/apollo/config.c | 1 - arch/m68k/atari/atasound.c| 1 - arch/m68k/atari/stram.c | 1 - arch/m68k/bvme6000/config.c | 1 - arch/m68k/kernel/process.c| 1 - arch/m68k/kernel/ptrace.c | 1 - arch/m68k/kernel/setup_no.c | 1 - arch/m68k/kernel/signal.c | 1 - arch/m68k/kernel/uboot.c
[PATCH 00/12] mm: consolidate definitions of page table accessors
From: Mike Rapoport Hi, The low level page table accessors (pXY_index(), pXY_offset()) are duplicated across all architectures and sometimes more than once. For instance, we have 31 definition of pgd_offset() for 25 supported architectures. Most of these definitions are actually identical and typically it boils down to, e.g. static inline unsigned long pmd_index(unsigned long address) { return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); } static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address); } These definitions can be shared among 90% of the arches provided XYZ_SHIFT, PTRS_PER_XYZ and xyz_page_vaddr() are defined. For architectures that really need a custom version there is always possibility to override the generic version with the usual ifdefs magic. These patches introduce include/linux/pgtable.h that replaces include/asm-generic/pgtable.h and add the definitions of the page table accessors to the new header. The patches are vs the v5.7-rc5-mmots-2020-05-11-15-47 Mike Rapoport (12): mm: don't include asm/pgtable.h if linux/mm.h is already included mm: introduce include/linux/pgtable.h mm: reorder includes after introduction of linux/pgtable.h csky: replace definitions of __pXd_offset() with pXd_index() m68k/mm/motorola: move comment about page table allocation funcitons m68k/mm: move {cache,nocahe}_page() definitions close to their user x86/mm: simplify init_trampoline() and surrounding logic mm: pgtable: add shortcuts for accessing kernel PMD and PTE mm: consolidate pte_index() and pte_offset_*() definitions mm: consolidate pmd_index() and pmd_offset() definitions mm: consolidate pud_index() and pud_offset() definitions mm: consolidate pgd_index() and pgd_offset{_k}() definitions arch/alpha/boot/bootp.c | 1 - arch/alpha/boot/bootpz.c | 1 - arch/alpha/boot/main.c| 1 - arch/alpha/include/asm/io.h | 1 - arch/alpha/include/asm/pgtable.h | 16 +-- arch/alpha/kernel/process.c | 1 - arch/alpha/kernel/proto.h | 2 - arch/alpha/kernel/ptrace.c| 1 - arch/alpha/kernel/setup.c | 1 - arch/alpha/kernel/smp.c | 1 - arch/alpha/kernel/sys_alcor.c | 1 - arch/alpha/kernel/sys_cabriolet.c | 1 - arch/alpha/kernel/sys_dp264.c | 1 - arch/alpha/kernel/sys_eb64p.c | 1 - arch/alpha/kernel/sys_eiger.c | 1 - arch/alpha/kernel/sys_jensen.c| 1 - arch/alpha/kernel/sys_marvel.c| 1 - arch/alpha/kernel/sys_miata.c | 1 - arch/alpha/kernel/sys_mikasa.c| 1 - arch/alpha/kernel/sys_nautilus.c | 1 - arch/alpha/kernel/sys_noritake.c | 1 - arch/alpha/kernel/sys_rawhide.c | 1 - arch/alpha/kernel/sys_ruffian.c | 1 - arch/alpha/kernel/sys_rx164.c | 1 - arch/alpha/kernel/sys_sable.c | 1 - arch/alpha/kernel/sys_sio.c | 1 - arch/alpha/kernel/sys_sx164.c | 1 - arch/alpha/kernel/sys_takara.c| 1 - arch/alpha/kernel/sys_titan.c | 1 - arch/alpha/kernel/sys_wildfire.c | 1 - arch/alpha/mm/init.c | 1 - arch/arc/include/asm/pgtable.h| 24 arch/arc/mm/highmem.c | 12 +- arch/arc/mm/tlbex.S | 2 +- arch/arm/include/asm/efi.h| 1 - arch/arm/include/asm/fixmap.h | 2 +- arch/arm/include/asm/idmap.h | 2 +- arch/arm/include/asm/pgtable-2level.h | 1 + arch/arm/include/asm/pgtable-3level.h | 7 -- arch/arm/include/asm/pgtable-nommu.h | 3 - arch/arm/include/asm/pgtable.h| 25 arch/arm/kernel/head.S| 2 +- arch/arm/kernel/machine_kexec.c | 1 - arch/arm/kernel/module.c | 1 - arch/arm/kernel/ptrace.c | 1 - arch/arm/kernel/smp.c | 1 - arch/arm/kernel/suspend.c | 2 +- arch/arm/kernel/vmlinux.lds.S | 2 +- arch/arm/mach-ebsa110/core.c | 1 - arch/arm/mach-footbridge/common.c | 1 - arch/arm/mach-imx/mm-imx21.c | 1 - arch/arm/mach-imx/mm-imx27.c | 1 - arch/arm/mach-imx/mm-imx3.c | 1 - arch/arm/mach-integrator/core.c | 2 +- arch/arm/mach-iop32x/i2c.c| 1 - arch/arm/mach-iop32x/iq31244.c| 1 - arch/arm/mach-iop32x/iq80321.c|
Re: [PATCH V3 3/3] mm/hugetlb: Define a generic fallback for arch_clear_hugepage_flags()
On 5/7/20 8:07 PM, Anshuman Khandual wrote: > There are multiple similar definitions for arch_clear_hugepage_flags() on > various platforms. Lets just add it's generic fallback definition for > platforms that do not override. This help reduce code duplication. > > Cc: Russell King > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Tony Luck > Cc: Fenghua Yu > Cc: Thomas Bogendoerfer > Cc: "James E.J. Bottomley" > Cc: Helge Deller > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Paul Walmsley > Cc: Palmer Dabbelt > Cc: Heiko Carstens > Cc: Vasily Gorbik > Cc: Christian Borntraeger > Cc: Yoshinori Sato > Cc: Rich Felker > Cc: "David S. Miller" > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: Borislav Petkov > Cc: "H. Peter Anvin" > Cc: Mike Kravetz > Cc: Andrew Morton > Cc: x...@kernel.org > Cc: linux-arm-ker...@lists.infradead.org > Cc: linux-i...@vger.kernel.org > Cc: linux-m...@vger.kernel.org > Cc: linux-par...@vger.kernel.org > Cc: linuxppc-dev@lists.ozlabs.org > Cc: linux-ri...@lists.infradead.org > Cc: linux-s...@vger.kernel.org > Cc: linux...@vger.kernel.org > Cc: sparcli...@vger.kernel.org > Cc: linux...@kvack.org > Cc: linux-a...@vger.kernel.org > Cc: linux-ker...@vger.kernel.org > Signed-off-by: Anshuman Khandual Thanks! Removing duplicate code is good. Acked-by: Mike Kravetz -- Mike Kravetz
Re: [PATCH V3 2/3] mm/hugetlb: Define a generic fallback for is_hugepage_only_range()
On 5/10/20 8:14 PM, Anshuman Khandual wrote: > On 05/09/2020 03:52 AM, Mike Kravetz wrote: >> On 5/7/20 8:07 PM, Anshuman Khandual wrote: >> >> Did you try building without CONFIG_HUGETLB_PAGE defined? I'm guessing > > Yes I did for multiple platforms (s390, arm64, ia64, x86, powerpc etc). > >> that you need a stub for is_hugepage_only_range(). Or, perhaps add this >> to asm-generic/hugetlb.h? >> > There is already a stub (include/linux/hugetlb.h) when !CONFIG_HUGETLB_PAGE. > Thanks! I missed that stub in the existing code. I like the removal of redundant code. Acked-by: Mike Kravetz -- Mike Kravetz