from:"Mike"

[PATCH v3 08/17] memblock: make for_each_memblock_type() iterator private

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

for_each_memblock_type() is not used outside mm/memblock.c, move it there
from include/linux/memblock.h

Signed-off-by: Mike Rapoport 
Reviewed-by: Baoquan He 
---
 include/linux/memblock.h | 5 -
 mm/memblock.c| 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9d925db0d355..550faf69fc1c 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -552,11 +552,6 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
 region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
 region++)
 
-#define for_each_memblock_type(i, memblock_type, rgn)  \
-   for (i = 0, rgn = _type->regions[0];   \
-i < memblock_type->cnt;\
-i++, rgn = _type->regions[i])
-
 extern void *alloc_large_system_hash(const char *tablename,
 unsigned long bucketsize,
 unsigned long numentries,
diff --git a/mm/memblock.c b/mm/memblock.c
index 45f198750be9..59f3998ae5db 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -132,6 +132,11 @@ struct memblock_type physmem = {
 };
 #endif
 
+#define for_each_memblock_type(i, memblock_type, rgn)  \
+   for (i = 0, rgn = _type->regions[0];   \
+i < memblock_type->cnt;\
+i++, rgn = _type->regions[i])
+
 int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
-- 
2.26.2

[PATCH v3 07/17] mircoblaze: drop unneeded NUMA and sparsemem initializations

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

microblaze does not support neither NUMA not SPARSMEM, so there is no point
to call memblock_set_node() and sparse_memory_present_with_active_regions()
functions during microblaze memory initialization.

Remove these calls and the surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/microblaze/mm/init.c | 14 +-
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 0880a003573d..49e0c241f9b1 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -105,9 +105,8 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-   struct memblock_region *reg;
-
 #ifndef CONFIG_MMU
+   struct memblock_region *reg;
u32 kernel_align_start, kernel_align_size;
 
/* Find main memory where is the kernel */
@@ -161,17 +160,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-   /* Add active regions with valid PFNs */
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn, end_pfn;
-
-   start_pfn = memblock_region_memory_base_pfn(reg);
-   end_pfn = memblock_region_memory_end_pfn(reg);
-   memblock_set_node(start_pfn << PAGE_SHIFT,
- (end_pfn - start_pfn) << PAGE_SHIFT,
- , 0);
-   }
-
paging_init();
 }
 
-- 
2.26.2

[PATCH v3 06/17] riscv: drop unneeded node initialization

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

RISC-V does not (yet) support NUMA  and for UMA architectures node 0 is
used implicitly during early memory initialization.

There is no need to call memblock_set_node(), remove this call and the
surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/riscv/mm/init.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 787c75f751a5..0485cfaacc72 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
-
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
-   unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
-   memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- , 0);
-   }
 }
 
 #ifdef CONFIG_MMU
-- 
2.26.2

[PATCH v3 05/17] h8300, nds32, openrisc: simplify detection of memory extents

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

Instead of traversing memblock.memory regions to find memory_start and
memory_end, simply query memblock_{start,end}_of_DRAM().

Signed-off-by: Mike Rapoport 
Acked-by: Stafford Horne 
---
 arch/h8300/kernel/setup.c| 8 +++-
 arch/nds32/kernel/setup.c| 8 ++--
 arch/openrisc/kernel/setup.c | 9 ++---
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 28ac88358a89..0281f92eea3d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end)
panic("No memory!");
 
/* setup bootmem globals (we use no_bootmem, but mm still depends on 
this) */
min_low_pfn = PFN_UP(memory_start);
-   max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+   max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn;
 
memblock_reserve(__pa(_stext), _end - _stext);
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index a066efbe53c0..c356e484dcab 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
-   memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index b18e775f8be3..5a5940f7ebb1 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn;
unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end;
-   struct memblock_region *region;
 
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel, we assume its the only one */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-  memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
-- 
2.26.2

[PATCH v3 04/17] arm64: numa: simplify dummy_numa_init()

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

dummy_numa_init() loops over memblock.memory and passes nid=0 to
numa_add_memblk() which essentially wraps memblock_set_node(). However,
memblock_set_node() can cope with entire memory span itself, so the loop
over memblock.memory regions is redundant.

Using a single call to memblock_set_node() rather than a loop also fixes an
issue with a buggy ACPI firmware in which the SRAT table covers some but
not all of the memory in the EFI memory map.

Jonathan Cameron says:

  This issue can be easily triggered by having an SRAT table which fails
  to cover all elements of the EFI memory map.

  This firmware error is detected and a warning printed. e.g.
  "NUMA: Warning: invalid memblk node 64 [mem 0x24000-0x27fff]"
  At that point we fall back to dummy_numa_init().

  However, the failed ACPI init has left us with our memblocks all broken
  up as we split them when trying to assign them to NUMA nodes.

  We then iterate over the memblocks and add them to node 0.

  numa_add_memblk() calls memblock_set_node() which merges regions that
  were previously split up during the earlier attempt to add them to different
  nodes during parsing of SRAT.

  This means elements are moved in the memblock array and we can end up
  in a different memblock after the call to numa_add_memblk().
  Result is:

  Unable to handle kernel paging request at virtual address 3a40
  Mem abort info:
ESR = 0x9604
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
  Data abort info:
ISV = 0, ISS = 0x0004
CM = 0, WnR = 0
  [3a40] user address but active_mm is swapper
  Internal error: Oops: 9604 [#1] PREEMPT SMP

  ...

  Call trace:
sparse_init_nid+0x5c/0x2b0
sparse_init+0x138/0x170
bootmem_init+0x80/0xe0
setup_arch+0x2a0/0x5fc
start_kernel+0x8c/0x648

Replace the loop with a single call to memblock_set_node() to the entire
memory.

Signed-off-by: Mike Rapoport 
Acked-by: Jonathan Cameron 
Acked-by: Catalin Marinas 
---
 arch/arm64/mm/numa.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 73f8b49d485c..8a97cd3d2dfe 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void))
  */
 static int __init dummy_numa_init(void)
 {
+   phys_addr_t start = memblock_start_of_DRAM();
+   phys_addr_t end = memblock_end_of_DRAM();
int ret;
-   struct memblock_region *mblk;
 
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
-   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
-   memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
-
-   for_each_memblock(memory, mblk) {
-   ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
-   if (!ret)
-   continue;
+   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
 
+   ret = numa_add_memblk(0, start, end);
+   if (ret) {
pr_err("NUMA init failed\n");
return ret;
}
-- 
2.26.2

[PATCH v3 03/17] arm, xtensa: simplify initialization of high memory pages

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

The function free_highpages() in both arm and xtensa essentially open-code
for_each_free_mem_range() loop to detect high memory pages that were not
reserved and that should be initialized and passed to the buddy allocator.

Replace open-coded implementation of for_each_free_mem_range() with usage
of memblock API to simplify the code.

Signed-off-by: Mike Rapoport 
Reviewed-by: Max Filippov   # xtensa
Tested-by: Max Filippov # xtensa
---
 arch/arm/mm/init.c| 48 +++--
 arch/xtensa/mm/init.c | 55 ---
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 000c1b48e973..50a5a30a78ff 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -347,61 +347,29 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-#ifdef CONFIG_HIGHMEM
-static inline void free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-#endif
-
 static void __init free_highpages(void)
 {
 #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = memblock_region_reserved_base_pfn(res);
-   res_end = memblock_region_reserved_end_pfn(res);
-
-   if (res_end < start)
-   continue;
-   if (res_start < start)
-   res_start = start;
-   if (res_start > end)
-   res_start = end;
-   if (res_end > end)
-   res_end = end;
-   if (res_start != start)
-   free_area_high(start, res_start);
-   start = res_end;
-   if (start == end)
-   break;
-   }
-
-   /* And now free anything which remains */
-   if (start < end)
-   free_area_high(start, end);
+   for (; start < end; start++)
+   free_highmem_page(pfn_to_page(start));
}
 #endif
 }
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index a05b306cf371..ad9d59d93f39 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn);
 }
 
-#ifdef CONFIG_HIGHMEM
-static void __init free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-
 static void __init free_highpages(void)
 {
+#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
-   reset_all_zones_managed_pages();
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-

[PATCH v3 02/17] dma-contiguous: simplify cma_early_percent_memory()

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in cma_early_percent_memory() traverses
memblock.memory rather than simply call memblock_phys_mem_size(). The
comment in that function suggests that at some point there should have been
call to memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
Reviewed-by: Christoph Hellwig 
Reviewed-by: Baoquan He 
---
 kernel/dma/contiguous.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index cff7e60968b9..0369fd5fda8f 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -73,16 +73,7 @@ early_param("cma", early_cma);
 
 static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
 {
-   struct memblock_region *reg;
-   unsigned long total_pages = 0;
-
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   total_pages += memblock_region_memory_end_pfn(reg) -
-  memblock_region_memory_base_pfn(reg);
+   unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size());
 
return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
 }
-- 
2.26.2

[PATCH v3 01/17] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in kvm_cma_reserve() traverses memblock.memory
rather than simply call memblock_phys_mem_size(). The comment in that
function suggests that at some point there should have been call to
memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 12 ++--
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 073617ce83e0..8f58dd20b362 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -95,23 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 void __init kvm_cma_reserve(void)
 {
unsigned long align_size;
-   struct memblock_region *reg;
-   phys_addr_t selected_size = 0;
+   phys_addr_t selected_size;
 
/*
 * We need CMA reservation only when we are in HV mode
 */
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   selected_size += memblock_region_memory_end_pfn(reg) -
-memblock_region_memory_base_pfn(reg);
 
-   selected_size = (selected_size * kvm_cma_resv_ratio / 100) << 
PAGE_SHIFT;
+   selected_size = PAGE_ALIGN(memblock_phys_mem_size() * 
kvm_cma_resv_ratio / 100);
if (selected_size) {
pr_info("%s: reserving %ld MiB for global area\n", __func__,
 (unsigned long)selected_size / SZ_1M);
-- 
2.26.2

[PATCH v3 00/17] memblock: seasonal cleaning^w cleanup

2020-08-18 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

These patches simplify several uses of memblock iterators and hide some of
the memblock implementation details from the rest of the system.

The patches are on top of v5.9-rc1

v3 changes:
* rebase on v5.9-rc1, as the result this required some non-trivial changes
  in patches 10 and 16. I didn't add Baoquan's Reviewed-by to theses
  patches, but I keept Thomas and Miguel
* Add Acked-by from Thomas and Miguel as there were changes in MIPS and
  only trivial changes in .clang-format
* Added Reviewed-by from Baoquan except for the patches 10 and 16
* Fixed misc build errors and warnings reported by kbuild bot
* Updated PowerPC KVM reservation size (patch 2), as per Daniel's comment

v2 changes:
* replace for_each_memblock() with two versions, one for memblock.memory
  and another one for memblock.reserved
* fix overzealous cleanup of powerpc fadamp: keep the traversal over the
  memblocks, but use better suited iterators
* don't remove traversal over memblock.reserved in x86 numa cleanup but
  replace for_each_memblock() with new for_each_reserved_mem_region()
* simplify ramdisk and crash kernel allocations on x86
* drop more redundant and unused code: __next_reserved_mem_region() and
  memblock_mem_size()
* add description of numa initialization fix on arm64 (thanks Jonathan)
* add Acked and Reviewed tags

Mike Rapoport (17):
  KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
  dma-contiguous: simplify cma_early_percent_memory()
  arm, xtensa: simplify initialization of high memory pages
  arm64: numa: simplify dummy_numa_init()
  h8300, nds32, openrisc: simplify detection of memory extents
  riscv: drop unneeded node initialization
  mircoblaze: drop unneeded NUMA and sparsemem initializations
  memblock: make for_each_memblock_type() iterator private
  memblock: make memblock_debug and related functionality private
  memblock: reduce number of parameters in for_each_mem_range()
  arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
  arch, drivers: replace for_each_membock() with for_each_mem_range()
  x86/setup: simplify initrd relocation and reservation
  x86/setup: simplify reserve_crashkernel()
  memblock: remove unused memblock_mem_size()
  memblock: implement for_each_reserved_mem_region() using
__next_mem_region()
  memblock: use separate iterators for memory and reserved regions

 .clang-format|  5 +-
 arch/arm/kernel/setup.c  | 18 +++--
 arch/arm/mm/init.c   | 59 +++
 arch/arm/mm/mmu.c| 39 --
 arch/arm/mm/pmsa-v7.c| 23 +++---
 arch/arm/mm/pmsa-v8.c| 17 ++---
 arch/arm/xen/mm.c|  7 +-
 arch/arm64/kernel/machine_kexec_file.c   |  6 +-
 arch/arm64/kernel/setup.c|  4 +-
 arch/arm64/mm/init.c | 11 +--
 arch/arm64/mm/kasan_init.c   | 10 +--
 arch/arm64/mm/mmu.c  | 11 +--
 arch/arm64/mm/numa.c | 15 ++--
 arch/c6x/kernel/setup.c  |  9 ++-
 arch/h8300/kernel/setup.c|  8 +-
 arch/microblaze/mm/init.c| 21 ++
 arch/mips/cavium-octeon/dma-octeon.c | 12 +--
 arch/mips/kernel/setup.c | 31 
 arch/mips/netlogic/xlp/setup.c   |  2 +-
 arch/nds32/kernel/setup.c|  8 +-
 arch/openrisc/kernel/setup.c |  9 +--
 arch/openrisc/mm/init.c  |  8 +-
 arch/powerpc/kernel/fadump.c | 57 +++---
 arch/powerpc/kexec/file_load_64.c| 16 ++--
 arch/powerpc/kvm/book3s_hv_builtin.c | 12 +--
 arch/powerpc/mm/book3s64/hash_utils.c| 16 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 10 +--
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 +-
 arch/powerpc/mm/mem.c| 33 
 arch/powerpc/mm/numa.c   |  7 +-
 arch/powerpc/mm/pgtable_32.c |  8 +-
 arch/riscv/mm/init.c | 36 +++--
 arch/riscv/mm/kasan_init.c   | 10 +--
 arch/s390/kernel/setup.c | 27 ---
 arch/s390/mm/page-states.c   |  6 +-
 arch/s390/mm/vmem.c  |  7 +-
 arch/sh/mm/init.c|  9 +--
 arch/sparc/mm/init_64.c  | 12 +--
 arch/x86/kernel/setup.c  | 56 +-
 arch/x86/mm/numa.c   |  2 +-
 arch/xtensa/mm/init.c| 55 +++---
 drivers/bus/mvebu-mbus.c | 12 +--
 drivers/irqchip/irq-gic-v3-its.c |  2 +-
 include/linux/memblock.h | 88 +-
 kernel/dma/contiguous.c  | 11 +--
 mm/memblock.c| 95 ++--
 mm/page_alloc.c  | 11 ++-
 mm/sparse.c  | 10 +--
 48 files changed, 387 insertions(+), 562 deletions(-)

-- 
2.26.2

Re: [PATCH v2 13/17] x86/setup: simplify initrd relocation and reservation

2020-08-05 Thread Mike Rapoport

On Wed, Aug 05, 2020 at 12:20:24PM +0800, Baoquan He wrote:
> On 08/02/20 at 07:35pm, Mike Rapoport wrote:
> > From: Mike Rapoport 
> > 
> > Currently, initrd image is reserved very early during setup and then it
> > might be relocated and re-reserved after the initial physical memory
> > mapping is created. The "late" reservation of memblock verifies that mapped
> > memory size exceeds the size of initrd, the checks whether the relocation
>   ~ then?

Right, thanks!

> > required and, if yes, relocates inirtd to a new memory allocated from
> > memblock and frees the old location.

[PATCH v2 17/17] memblock: use separate iterators for memory and reserved regions

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

for_each_memblock() is used to iterate over memblock.memory in
a few places that use data from memblock_region rather than the memory
ranges.

Introduce separate for_each_mem_region() and for_each_reserved_mem_region()
to improve encapsulation of memblock internals from its users.

Signed-off-by: Mike Rapoport 
---
 .clang-format  |  3 ++-
 arch/arm64/kernel/setup.c  |  2 +-
 arch/arm64/mm/numa.c   |  2 +-
 arch/mips/netlogic/xlp/setup.c |  2 +-
 arch/x86/mm/numa.c |  2 +-
 include/linux/memblock.h   | 19 ---
 mm/memblock.c  |  4 ++--
 mm/page_alloc.c|  8 
 8 files changed, 28 insertions(+), 14 deletions(-)

diff --git a/.clang-format b/.clang-format
index e28a849a1c58..cff71d345456 100644
--- a/.clang-format
+++ b/.clang-format
@@ -201,7 +201,7 @@ ForEachMacros:
   - 'for_each_matching_node'
   - 'for_each_matching_node_and_match'
   - 'for_each_member'
-  - 'for_each_memblock'
+  - 'for_each_mem_region'
   - 'for_each_memblock_type'
   - 'for_each_memcg_cache_index'
   - 'for_each_mem_pfn_range'
@@ -267,6 +267,7 @@ ForEachMacros:
   - 'for_each_property_of_node'
   - 'for_each_registered_fb'
   - 'for_each_reserved_mem_range'
+  - 'for_each_reserved_mem_region'
   - 'for_each_rtd_codec_dais'
   - 'for_each_rtd_codec_dais_rollback'
   - 'for_each_rtd_components'
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index f3aec7244aab..52ea2f1a7184 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
 
-   for_each_memblock(memory, region) {
+   for_each_mem_region(region) {
res = _resources[i++];
if (memblock_is_nomap(region)) {
res->name  = "reserved";
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 0cbdbcc885fb..f121e42246a6 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -350,7 +350,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk;
 
/* Check that valid nid is set to memblks */
-   for_each_memblock(memory, mblk) {
+   for_each_mem_region(mblk) {
int mblk_nid = memblock_get_region_node(mblk);
 
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 1a0fc5b62ba4..6e3102bcd2f1 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512;
struct memblock_region *mem;
 
-   for_each_memblock(memory, mem) {
+   for_each_mem_region(mem) {
memblock_remove(mem->base + mem->size - pref_backup,
pref_backup);
}
diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8ee952038c80..fe6ea18d6923 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -516,7 +516,7 @@ static void __init numa_clear_kernel_node_hotplug(void)
 *   memory ranges, because quirks such as trim_snb_memory()
 *   reserve specific pages for Sandy Bridge graphics. ]
 */
-   for_each_memblock(reserved, mb_region) {
+   for_each_reserved_mem_region(mb_region) {
int nid = memblock_get_region_node(mb_region);
 
if (nid != MAX_NUMNODES)
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 9e51b3fd4134..a6970e058bd7 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -522,9 +522,22 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
return PFN_UP(reg->base + reg->size);
 }
 
-#define for_each_memblock(memblock_type, region)   
\
-   for (region = memblock.memblock_type.regions;   
\
-region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
+/**
+ * for_each_mem_region - itereate over registered memory regions
+ * @region: loop variable
+ */
+#define for_each_mem_region(region)\
+   for (region = memblock.memory.regions;  \
+region < (memblock.memory.regions + memblock.memory.cnt);  \
+region++)
+
+/**
+ * for_each_reserved_mem_region - itereate over reserved memory regions
+ * @region: loop variable
+ */
+#define for_each_reserved_mem_region(region)   \
+   for (region = memblock.reserved.regions;\
+region < (memblock.reserved.regions + memblock.reserved.cnt); \
 region++)
 
 extern void *alloc_large_system_hash(const char *tablen

[PATCH v2 16/17] memblock: implement for_each_reserved_mem_region() using __next_mem_region()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

Iteration over memblock.reserved with for_each_reserved_mem_region() used
__next_reserved_mem_region() that implemented a subset of
__next_mem_region().

Use __for_each_mem_range() and, essentially, __next_mem_region() with
appropriate parameters to reduce code duplication.

While on it, rename for_each_reserved_mem_region() to
for_each_reserved_mem_range() for consistency.

Signed-off-by: Mike Rapoport 
---
 .clang-format|  2 +-
 arch/arm64/kernel/setup.c|  2 +-
 drivers/irqchip/irq-gic-v3-its.c |  2 +-
 include/linux/memblock.h | 12 +++--
 mm/memblock.c| 46 +++-
 5 files changed, 17 insertions(+), 47 deletions(-)

diff --git a/.clang-format b/.clang-format
index 52ededab25ce..e28a849a1c58 100644
--- a/.clang-format
+++ b/.clang-format
@@ -266,7 +266,7 @@ ForEachMacros:
   - 'for_each_process_thread'
   - 'for_each_property_of_node'
   - 'for_each_registered_fb'
-  - 'for_each_reserved_mem_region'
+  - 'for_each_reserved_mem_range'
   - 'for_each_rtd_codec_dais'
   - 'for_each_rtd_codec_dais_rollback'
   - 'for_each_rtd_components'
diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 93b3844cf442..f3aec7244aab 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -257,7 +257,7 @@ static int __init reserve_memblock_reserved_regions(void)
if (!memblock_is_region_reserved(mem->start, mem_size))
continue;
 
-   for_each_reserved_mem_region(j, _start, _end) {
+   for_each_reserved_mem_range(j, _start, _end) {
resource_size_t start, end;
 
start = max(PFN_PHYS(PFN_DOWN(r_start)), mem->start);
diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index beac4caefad9..9971fd8cf6b6 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -2192,7 +2192,7 @@ static bool gic_check_reserved_range(phys_addr_t addr, 
unsigned long size)
 
addr_end = addr + size - 1;
 
-   for_each_reserved_mem_region(i, , ) {
+   for_each_reserved_mem_range(i, , ) {
if (addr >= start && addr_end <= end)
return true;
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index ec2fd8f32a19..9e51b3fd4134 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -136,9 +136,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum 
memblock_flags flags,
  struct memblock_type *type_b, phys_addr_t *out_start,
  phys_addr_t *out_end, int *out_nid);
 
-void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start,
-   phys_addr_t *out_end);
-
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
 /**
@@ -193,7 +190,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
 MEMBLOCK_NONE, p_start, p_end, NULL)
 
 /**
- * for_each_reserved_mem_region - iterate over all reserved memblock areas
+ * for_each_reserved_mem_range - iterate over all reserved memblock areas
  * @i: u64 used as loop variable
  * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
@@ -201,10 +198,9 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
  * Walks over reserved areas of memblock. Available as soon as memblock
  * is initialized.
  */
-#define for_each_reserved_mem_region(i, p_start, p_end)
\
-   for (i = 0UL, __next_reserved_mem_region(, p_start, p_end);   \
-i != (u64)ULLONG_MAX;  \
-__next_reserved_mem_region(, p_start, p_end))
+#define for_each_reserved_mem_range(i, p_start, p_end) \
+   __for_each_mem_range(i, , NULL, NUMA_NO_NODE, \
+MEMBLOCK_NONE, p_start, p_end, NULL)
 
 static inline bool memblock_is_hotpluggable(struct memblock_region *m)
 {
diff --git a/mm/memblock.c b/mm/memblock.c
index 48d614352b25..dadf579f7c53 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -946,42 +946,16 @@ int __init_memblock memblock_clear_nomap(phys_addr_t 
base, phys_addr_t size)
return memblock_setclr_flag(base, size, 0, MEMBLOCK_NOMAP);
 }
 
-/**
- * __next_reserved_mem_region - next function for for_each_reserved_region()
- * @idx: pointer to u64 loop variable
- * @out_start: ptr to phys_addr_t for start address of the region, can be %NULL
- * @out_end: ptr to phys_addr_t for end address of the region, can be %NULL
- *
- * Iterate over all reserved memory regions.
- */
-void __init_memblock __next_reserved_mem_region(u64 *idx,
-  phys_addr_t *out_start,
-  phys_addr_t *out_end)
-{
-

[PATCH v2 15/17] memblock: remove unused memblock_mem_size()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

The only user of memblock_mem_size() was x86 setup code, it is gone now and
memblock_mem_size() funciton can be removed.

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h |  1 -
 mm/memblock.c| 15 ---
 2 files changed, 16 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d70c2835e913..ec2fd8f32a19 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -450,7 +450,6 @@ static inline bool memblock_bottom_up(void)
 
 phys_addr_t memblock_phys_mem_size(void);
 phys_addr_t memblock_reserved_size(void);
-phys_addr_t memblock_mem_size(unsigned long limit_pfn);
 phys_addr_t memblock_start_of_DRAM(void);
 phys_addr_t memblock_end_of_DRAM(void);
 void memblock_enforce_memory_limit(phys_addr_t memory_limit);
diff --git a/mm/memblock.c b/mm/memblock.c
index c1a4c8798973..48d614352b25 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1656,21 +1656,6 @@ phys_addr_t __init_memblock memblock_reserved_size(void)
return memblock.reserved.total_size;
 }
 
-phys_addr_t __init memblock_mem_size(unsigned long limit_pfn)
-{
-   unsigned long pages = 0;
-   unsigned long start_pfn, end_pfn;
-   int i;
-
-   for_each_mem_pfn_range(i, MAX_NUMNODES, _pfn, _pfn, NULL) {
-   start_pfn = min_t(unsigned long, start_pfn, limit_pfn);
-   end_pfn = min_t(unsigned long, end_pfn, limit_pfn);
-   pages += end_pfn - start_pfn;
-   }
-
-   return PFN_PHYS(pages);
-}
-
 /* lowest address */
 phys_addr_t __init_memblock memblock_start_of_DRAM(void)
 {
-- 
2.26.2

[PATCH v2 14/17] x86/setup: simplify reserve_crashkernel()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

* Replace magic numbers with defines
* Replace memblock_find_in_range() + memblock_reserve() with
  memblock_phys_alloc_range()
* Stop checking for low memory size in reserve_crashkernel_low(). The
  allocation from limited range will anyway fail if there is no enough
  memory, so there is no need for extra traversal of memblock.memory

Signed-off-by: Mike Rapoport 
---
 arch/x86/kernel/setup.c | 40 ++--
 1 file changed, 14 insertions(+), 26 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d8de4053c5e8..d7ced6982524 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -419,13 +419,13 @@ static int __init reserve_crashkernel_low(void)
 {
 #ifdef CONFIG_X86_64
unsigned long long base, low_base = 0, low_size = 0;
-   unsigned long total_low_mem;
+   unsigned long low_mem_limit;
int ret;
 
-   total_low_mem = memblock_mem_size(1UL << (32 - PAGE_SHIFT));
+   low_mem_limit = min(memblock_phys_mem_size(), CRASH_ADDR_LOW_MAX);
 
/* crashkernel=Y,low */
-   ret = parse_crashkernel_low(boot_command_line, total_low_mem, 
_size, );
+   ret = parse_crashkernel_low(boot_command_line, low_mem_limit, 
_size, );
if (ret) {
/*
 * two parts from kernel/dma/swiotlb.c:
@@ -443,23 +443,17 @@ static int __init reserve_crashkernel_low(void)
return 0;
}
 
-   low_base = memblock_find_in_range(0, 1ULL << 32, low_size, CRASH_ALIGN);
+   low_base = memblock_phys_alloc_range(low_size, CRASH_ALIGN, 0, 
CRASH_ADDR_LOW_MAX);
if (!low_base) {
pr_err("Cannot reserve %ldMB crashkernel low memory, please try 
smaller size.\n",
   (unsigned long)(low_size >> 20));
return -ENOMEM;
}
 
-   ret = memblock_reserve(low_base, low_size);
-   if (ret) {
-   pr_err("%s: Error reserving crashkernel low memblock.\n", 
__func__);
-   return ret;
-   }
-
-   pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (System 
low RAM: %ldMB)\n",
+   pr_info("Reserving %ldMB of low memory at %ldMB for crashkernel (low 
RAM limit: %ldMB)\n",
(unsigned long)(low_size >> 20),
(unsigned long)(low_base >> 20),
-   (unsigned long)(total_low_mem >> 20));
+   (unsigned long)(low_mem_limit >> 20));
 
crashk_low_res.start = low_base;
crashk_low_res.end   = low_base + low_size - 1;
@@ -503,13 +497,13 @@ static void __init reserve_crashkernel(void)
 * unless "crashkernel=size[KMG],high" is specified.
 */
if (!high)
-   crash_base = memblock_find_in_range(CRASH_ALIGN,
-   CRASH_ADDR_LOW_MAX,
-   crash_size, CRASH_ALIGN);
+   crash_base = memblock_phys_alloc_range(crash_size,
+   CRASH_ALIGN, CRASH_ALIGN,
+   CRASH_ADDR_LOW_MAX);
if (!crash_base)
-   crash_base = memblock_find_in_range(CRASH_ALIGN,
-   CRASH_ADDR_HIGH_MAX,
-   crash_size, CRASH_ALIGN);
+   crash_base = memblock_phys_alloc_range(crash_size,
+   CRASH_ALIGN, CRASH_ALIGN,
+   CRASH_ADDR_HIGH_MAX);
if (!crash_base) {
pr_info("crashkernel reservation failed - No suitable 
area found.\n");
return;
@@ -517,19 +511,13 @@ static void __init reserve_crashkernel(void)
} else {
unsigned long long start;
 
-   start = memblock_find_in_range(crash_base,
-  crash_base + crash_size,
-  crash_size, 1 << 20);
+   start = memblock_phys_alloc_range(crash_size, SZ_1M, crash_base,
+ crash_base + crash_size);
if (start != crash_base) {
pr_info("crashkernel reservation failed - memory is in 
use.\n");
return;
}
}
-   ret = memblock_reserve(crash_base, crash_size);
-   if (ret) {
-   pr_err("%s: Error reserving crashkernel memblock.\n", __func__);
-   return;
-   }
 
if (crash_base >= (1ULL << 32) && reserve_crashkernel_low()) {
memblock_free(crash_base, crash_size);
-- 
2.26.2

[PATCH v2 13/17] x86/setup: simplify initrd relocation and reservation

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

Currently, initrd image is reserved very early during setup and then it
might be relocated and re-reserved after the initial physical memory
mapping is created. The "late" reservation of memblock verifies that mapped
memory size exceeds the size of initrd, the checks whether the relocation
required and, if yes, relocates inirtd to a new memory allocated from
memblock and frees the old location.

The check for memory size is excessive as memblock allocation will anyway
fail if there is not enough memory. Besides, there is no point to allocate
memory from memblock using memblock_find_in_range() + memblock_reserve()
when there exists memblock_phys_alloc_range() with required functionality.

Remove the redundant check and simplify memblock allocation.

Signed-off-by: Mike Rapoport 
---
 arch/x86/kernel/setup.c | 16 +++-
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index a3767e74c758..d8de4053c5e8 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -262,16 +262,12 @@ static void __init relocate_initrd(void)
u64 area_size = PAGE_ALIGN(ramdisk_size);
 
/* We need to move the initrd down into directly mapped mem */
-   relocated_ramdisk = memblock_find_in_range(0, PFN_PHYS(max_pfn_mapped),
-  area_size, PAGE_SIZE);
-
+   relocated_ramdisk = memblock_phys_alloc_range(area_size, PAGE_SIZE, 0,
+ PFN_PHYS(max_pfn_mapped));
if (!relocated_ramdisk)
panic("Cannot find place for new RAMDISK of size %lld\n",
  ramdisk_size);
 
-   /* Note: this includes all the mem currently occupied by
-  the initrd, we rely on that fact to keep the data intact. */
-   memblock_reserve(relocated_ramdisk, area_size);
initrd_start = relocated_ramdisk + PAGE_OFFSET;
initrd_end   = initrd_start + ramdisk_size;
printk(KERN_INFO "Allocated new RAMDISK: [mem %#010llx-%#010llx]\n",
@@ -298,13 +294,13 @@ static void __init early_reserve_initrd(void)
 
memblock_reserve(ramdisk_image, ramdisk_end - ramdisk_image);
 }
+
 static void __init reserve_initrd(void)
 {
/* Assume only end is not page aligned */
u64 ramdisk_image = get_ramdisk_image();
u64 ramdisk_size  = get_ramdisk_size();
u64 ramdisk_end   = PAGE_ALIGN(ramdisk_image + ramdisk_size);
-   u64 mapped_size;
 
if (!boot_params.hdr.type_of_loader ||
!ramdisk_image || !ramdisk_size)
@@ -312,12 +308,6 @@ static void __init reserve_initrd(void)
 
initrd_start = 0;
 
-   mapped_size = memblock_mem_size(max_pfn_mapped);
-   if (ramdisk_size >= (mapped_size>>1))
-   panic("initrd too large to handle, "
-  "disabling initrd (%lld needed, %lld available)\n",
-  ramdisk_size, mapped_size>>1);
-
printk(KERN_INFO "RAMDISK: [mem %#010llx-%#010llx]\n", ramdisk_image,
ramdisk_end - 1);
 
-- 
2.26.2

[PATCH v2 12/17] arch, drivers: replace for_each_membock() with for_each_mem_range()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start = __pfn_to_phys(memblock_region_memory_base_pfn(reg);
end = __pfn_to_phys(memblock_region_memory_end_pfn(reg));

/* do something with start and end */
}

Using for_each_mem_range() iterator is more appropriate in such cases and
allows simpler and cleaner code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/kernel/setup.c  | 18 ++---
 arch/arm/mm/mmu.c| 39 ++
 arch/arm/mm/pmsa-v7.c| 20 +-
 arch/arm/mm/pmsa-v8.c| 17 
 arch/arm/xen/mm.c|  7 ++--
 arch/arm64/mm/kasan_init.c   | 10 ++---
 arch/arm64/mm/mmu.c  | 11 ++
 arch/c6x/kernel/setup.c  |  9 +++--
 arch/microblaze/mm/init.c|  9 +++--
 arch/mips/cavium-octeon/dma-octeon.c | 12 +++---
 arch/mips/kernel/setup.c | 31 +++
 arch/openrisc/mm/init.c  |  8 ++--
 arch/powerpc/kernel/fadump.c | 50 +++-
 arch/powerpc/mm/book3s64/hash_utils.c| 16 
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 +++---
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 ++--
 arch/powerpc/mm/mem.c| 16 +---
 arch/powerpc/mm/pgtable_32.c |  8 ++--
 arch/riscv/mm/init.c | 25 +---
 arch/riscv/mm/kasan_init.c   | 10 ++---
 arch/s390/kernel/setup.c | 27 -
 arch/s390/mm/vmem.c  | 16 
 arch/sparc/mm/init_64.c  | 12 ++
 drivers/bus/mvebu-mbus.c | 12 +++---
 drivers/s390/char/zcore.c|  9 +++--
 25 files changed, 200 insertions(+), 211 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d8e18cdd96d3..3f65d0ac9f63 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -843,19 +843,25 @@ early_param("mem", early_mem);
 
 static void __init request_standard_resources(const struct machine_desc *mdesc)
 {
-   struct memblock_region *region;
+   phys_addr_t start, end, res_end;
struct resource *res;
+   u64 i;
 
kernel_code.start   = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start   = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
 
-   for_each_memblock(memory, region) {
-   phys_addr_t start = 
__pfn_to_phys(memblock_region_memory_base_pfn(region));
-   phys_addr_t end = 
__pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+   for_each_mem_range(i, , ) {
unsigned long boot_alias_start;
 
+   /*
+* In memblock, end points to the first byte after the
+* range while in resourses, end points to the last byte in
+* the range.
+*/
+   res_end = end - 1;
+
/*
 * Some systems have a special memory alias which is only
 * used for booting.  We need to advertise this region to
@@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  __func__, sizeof(*res));
res->name = "System RAM (boot alias)";
res->start = boot_alias_start;
-   res->end = phys_to_idmap(end);
+   res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(_resource, res);
}
@@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  sizeof(*res));
res->name  = "System RAM";
res->start = start;
-   res->end = end;
+   res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
request_resource(_resource, res);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 628028bfbb92..a149d9cb4fdb 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1155,9 +1155,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
 
 void __init adjust_lowmem_bounds(void)
 {
-   phys_addr_t memblock_limit = 0;
-   u64 vmalloc_limit;
-   struct memblock_region *reg;
+   phys_addr_t block_start, block_end, memblock_limit = 0;
+   u64 vmalloc_limit, i;
phys_addr_t lowmem_limit = 0;
 
/*
@@ -1173,26 +1172,18 @@ void __init adjust_lowmem_bounds(void)
 * The first usable region must be PMD aligned. Mark its start
 * as MEMB

[PATCH v2 11/17] arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);

/* do something with start_pfn and end_pfn */
}

Rather than iterate over all memblock.memory regions and each time query
for their start and end PFNs, use for_each_mem_pfn_range() iterator to get
simpler and clearer code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 11 ---
 arch/arm64/mm/init.c | 11 ---
 arch/powerpc/kernel/fadump.c | 11 ++-
 arch/powerpc/mm/mem.c| 15 ---
 arch/powerpc/mm/numa.c   |  7 ++-
 arch/s390/mm/page-states.c   |  6 ++
 arch/sh/mm/init.c|  9 +++--
 mm/memblock.c|  6 ++
 mm/sparse.c  | 10 --
 9 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 626af348eb8f..d630573277d1 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -304,16 +304,14 @@ free_memmap(unsigned long start_pfn, unsigned long 
end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
+   unsigned long start, end, prev_end = 0;
+   int i;
 
/*
 * This relies on each bank being in address order.
 * The banks are sorted previously in bootmem_init().
 */
-   for_each_memblock(memory, reg) {
-   start = memblock_region_memory_base_pfn(reg);
-
+   for_each_mem_pfn_range(i, MAX_NUMNODES, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist
@@ -341,8 +339,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1e93cfc7c47a..291b5805457d 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -473,12 +473,10 @@ static inline void free_memmap(unsigned long start_pfn, 
unsigned long end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
-
-   for_each_memblock(memory, reg) {
-   start = __phys_to_pfn(reg->base);
+   unsigned long start, end, prev_end = 0;
+   int i;
 
+   for_each_mem_pfn_range(i, MAX_NUMNODES, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist due
@@ -498,8 +496,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 78ab9a6ee6ac..fc85cbc66839 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1216,14 +1216,15 @@ static void fadump_free_reserved_memory(unsigned long 
start_pfn,
  */
 static void fadump_release_reserved_area(u64 start, u64 end)
 {
-   u64 tstart, tend, spfn, epfn;
-   struct memblock_region *reg;
+   u64 tstart, tend, spfn, epfn, reg_spfn, reg_epfn, i;
 
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
-   for_each_memblock(memory, reg) {
-   tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
-   tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
+
+   for_each_mem_pfn_range(i, MAX_NUMNODES, _spfn, _epfn, NULL) {
+   tstart = max_t(u64, spfn, reg_spfn);
+   tend   = min_t(u64, epfn, reg_epfn);
+
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..1364dd532107 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -192,15 +192,16 @@ void __init initmem_init(void)
 /* mark pages that don't exist as nosave */
 static int __init mark_nonram_nosave(void)
 {
-   struct memblock_region *reg, *prev = NULL;
+   unsigned long spfn, epfn, prev = 0;
+   int i;
 
-   for_each_memblock(memory, reg) {
-   if (prev &&
-   memblock_region

[PATCH v2 10/17] memblock: reduce number of parameters in for_each_mem_range()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

Currently for_each_mem_range() iterator is the most generic way to traverse
memblock regions. As such, it has 8 parameters and it is hardly convenient
to users. Most users choose to utilize one of its wrappers and the only
user that actually needs most of the parameters outside memblock is s390
crash dump implementation.

To avoid yet another naming for memblock iterators, rename the existing
for_each_mem_range() to __for_each_mem_range() and add a new
for_each_mem_range() wrapper with only index, start and end parameters.

The new wrapper nicely fits into init_unavailable_mem() and will be used in
upcoming changes to simplify memblock traversals.

Signed-off-by: Mike Rapoport 
Reviewed-by: Baoquan He 
---
 .clang-format  |  1 +
 arch/arm64/kernel/machine_kexec_file.c |  6 ++
 arch/s390/kernel/crash_dump.c  |  8 
 include/linux/memblock.h   | 18 ++
 mm/page_alloc.c|  3 +--
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/.clang-format b/.clang-format
index a0a96088c74f..52ededab25ce 100644
--- a/.clang-format
+++ b/.clang-format
@@ -205,6 +205,7 @@ ForEachMacros:
   - 'for_each_memblock_type'
   - 'for_each_memcg_cache_index'
   - 'for_each_mem_pfn_range'
+  - '__for_each_mem_range'
   - 'for_each_mem_range'
   - 'for_each_mem_range_rev'
   - 'for_each_migratetype_order'
diff --git a/arch/arm64/kernel/machine_kexec_file.c 
b/arch/arm64/kernel/machine_kexec_file.c
index 361a1143e09e..5b0e67b93cdc 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
phys_addr_t start, end;
 
nr_ranges = 1; /* for exclusion of crashkernel region */
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL)
+   for_each_mem_range(i, , )
nr_ranges++;
 
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
 
cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0;
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL) {
+   for_each_mem_range(i, , ) {
cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a5857bbfd..e28085c725ff 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -549,8 +549,8 @@ static int get_mem_chunk_cnt(void)
int cnt = 0;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, NULL, NULL, NULL)
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, NULL, NULL, NULL)
cnt++;
return cnt;
 }
@@ -563,8 +563,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phys_addr_t start, end;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, , , NULL) {
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, , , NULL) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e6a23b3db696..d70c2835e913 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -142,7 +142,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t 
*out_start,
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
 /**
- * for_each_mem_range - iterate through memblock areas from type_a and not
+ * __for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
  * @i: u64 used as loop variable
  * @type_a: ptr to memblock_type to iterate
@@ -153,7 +153,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
  */
-#define for_each_mem_range(i, type_a, type_b, nid, flags,  \
+#define __for_each_mem_range(i, type_a, type_b, nid, flags,\
   p_start, p_end, p_nid)   \
for (i = 0, __next_mem_range(, nid, flags, type_a, type_b,\
 p_start, p_end, p_nid);\
@@ -182,6 +182,16 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
 __next_mem_range_re

[PATCH v2 09/17] memblock: make memblock_debug and related functionality private

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

The only user of memblock_dbg() outside memblock was s390 setup code and it
is converted to use pr_debug() instead.
This allows to stop exposing memblock_debug and memblock_dbg() to the rest
of the kernel.

Signed-off-by: Mike Rapoport 
Reviewed-by: Baoquan He 
---
 arch/s390/kernel/setup.c |  4 ++--
 include/linux/memblock.h | 12 +---
 mm/memblock.c| 13 +++--
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 07aa15ba43b3..8b284cf6e199 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -776,8 +776,8 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end;
int i;
 
-   memblock_dbg("physmem info source: %s (%hhd)\n",
-get_mem_info_source(), mem_detect.info_source);
+   pr_debug("physmem info source: %s (%hhd)\n",
+get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
for_each_mem_detect_block(i, , ) {
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 220b5f0dad42..e6a23b3db696 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -90,7 +90,6 @@ struct memblock {
 };
 
 extern struct memblock memblock;
-extern int memblock_debug;
 
 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
 #define __init_memblock __meminit
@@ -102,9 +101,6 @@ void memblock_discard(void);
 static inline void memblock_discard(void) {}
 #endif
 
-#define memblock_dbg(fmt, ...) \
-   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -456,13 +452,7 @@ bool memblock_is_region_memory(phys_addr_t base, 
phys_addr_t size);
 bool memblock_is_reserved(phys_addr_t addr);
 bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
-extern void __memblock_dump_all(void);
-
-static inline void memblock_dump_all(void)
-{
-   if (memblock_debug)
-   __memblock_dump_all();
-}
+void memblock_dump_all(void);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
diff --git a/mm/memblock.c b/mm/memblock.c
index a5b9b3df81fc..824938849f6d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -134,7 +134,10 @@ struct memblock memblock __initdata_memblock = {
 i < memblock_type->cnt;\
 i++, rgn = _type->regions[i])
 
-int memblock_debug __initdata_memblock;
+#define memblock_dbg(fmt, ...) \
+   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+
+static int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
@@ -1919,7 +1922,7 @@ static void __init_memblock memblock_dump(struct 
memblock_type *type)
}
 }
 
-void __init_memblock __memblock_dump_all(void)
+static void __init_memblock __memblock_dump_all(void)
 {
pr_info("MEMBLOCK configuration:\n");
pr_info(" memory size = %pa reserved size = %pa\n",
@@ -1933,6 +1936,12 @@ void __init_memblock __memblock_dump_all(void)
 #endif
 }
 
+void __init_memblock memblock_dump_all(void)
+{
+   if (memblock_debug)
+   __memblock_dump_all();
+}
+
 void __init memblock_allow_resize(void)
 {
memblock_can_resize = 1;
-- 
2.26.2

[PATCH v2 08/17] memblock: make for_each_memblock_type() iterator private

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

for_each_memblock_type() is not used outside mm/memblock.c, move it there
from include/linux/memblock.h

Signed-off-by: Mike Rapoport 
Reviewed-by: Baoquan He 
---
 include/linux/memblock.h | 5 -
 mm/memblock.c| 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..220b5f0dad42 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -532,11 +532,6 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
 region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
 region++)
 
-#define for_each_memblock_type(i, memblock_type, rgn)  \
-   for (i = 0, rgn = _type->regions[0];   \
-i < memblock_type->cnt;\
-i++, rgn = _type->regions[i])
-
 extern void *alloc_large_system_hash(const char *tablename,
 unsigned long bucketsize,
 unsigned long numentries,
diff --git a/mm/memblock.c b/mm/memblock.c
index 39aceafc57f6..a5b9b3df81fc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -129,6 +129,11 @@ struct memblock memblock __initdata_memblock = {
.current_limit  = MEMBLOCK_ALLOC_ANYWHERE,
 };
 
+#define for_each_memblock_type(i, memblock_type, rgn)  \
+   for (i = 0, rgn = _type->regions[0];   \
+i < memblock_type->cnt;\
+i++, rgn = _type->regions[i])
+
 int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
-- 
2.26.2

[PATCH v2 07/17] mircoblaze: drop unneeded NUMA and sparsemem initializations

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

microblaze does not support neither NUMA not SPARSMEM, so there is no point
to call memblock_set_node() and sparse_memory_present_with_active_regions()
functions during microblaze memory initialization.

Remove these calls and the surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/microblaze/mm/init.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 521b59ba716c..49e0c241f9b1 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -105,9 +105,8 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-   struct memblock_region *reg;
-
 #ifndef CONFIG_MMU
+   struct memblock_region *reg;
u32 kernel_align_start, kernel_align_size;
 
/* Find main memory where is the kernel */
@@ -161,20 +160,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-   /* Add active regions with valid PFNs */
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn, end_pfn;
-
-   start_pfn = memblock_region_memory_base_pfn(reg);
-   end_pfn = memblock_region_memory_end_pfn(reg);
-   memblock_set_node(start_pfn << PAGE_SHIFT,
- (end_pfn - start_pfn) << PAGE_SHIFT,
- , 0);
-   }
-
-   /* XXX need to clip this if using highmem? */
-   sparse_memory_present_with_active_regions(0);
-
paging_init();
 }
 
-- 
2.26.2

[PATCH v2 06/17] riscv: drop unneeded node initialization

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

RISC-V does not (yet) support NUMA  and for UMA architectures node 0 is
used implicitly during early memory initialization.

There is no need to call memblock_set_node(), remove this call and the
surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/riscv/mm/init.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 79e9d55bdf1a..7440ba2cdaaa 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
-
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
-   unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
-   memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- , 0);
-   }
 }
 
 #ifdef CONFIG_MMU
-- 
2.26.2

[PATCH v2 05/17] h8300, nds32, openrisc: simplify detection of memory extents

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

Instead of traversing memblock.memory regions to find memory_start and
memory_end, simply query memblock_{start,end}_of_DRAM().

Signed-off-by: Mike Rapoport 
Acked-by: Stafford Horne 
---
 arch/h8300/kernel/setup.c| 8 +++-
 arch/nds32/kernel/setup.c| 8 ++--
 arch/openrisc/kernel/setup.c | 9 ++---
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 28ac88358a89..0281f92eea3d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end)
panic("No memory!");
 
/* setup bootmem globals (we use no_bootmem, but mm still depends on 
this) */
min_low_pfn = PFN_UP(memory_start);
-   max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+   max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn;
 
memblock_reserve(__pa(_stext), _end - _stext);
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index a066efbe53c0..c356e484dcab 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
-   memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8aa438e1f51f..c5706153d3b6 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn;
unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end;
-   struct memblock_region *region;
 
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel, we assume its the only one */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-  memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
-- 
2.26.2

[PATCH v2 04/17] arm64: numa: simplify dummy_numa_init()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

dummy_numa_init() loops over memblock.memory and passes nid=0 to
numa_add_memblk() which essentially wraps memblock_set_node(). However,
memblock_set_node() can cope with entire memory span itself, so the loop
over memblock.memory regions is redundant.

Using a single call to memblock_set_node() rather than a loop also fixes an
issue with a buggy ACPI firmware in which the SRAT table covers some but
not all of the memory in the EFI memory map.

Jonathan Cameron says:

  This issue can be easily triggered by having an SRAT table which fails
  to cover all elements of the EFI memory map.

  This firmware error is detected and a warning printed. e.g.
  "NUMA: Warning: invalid memblk node 64 [mem 0x24000-0x27fff]"
  At that point we fall back to dummy_numa_init().

  However, the failed ACPI init has left us with our memblocks all broken
  up as we split them when trying to assign them to NUMA nodes.

  We then iterate over the memblocks and add them to node 0.

  numa_add_memblk() calls memblock_set_node() which merges regions that
  were previously split up during the earlier attempt to add them to different
  nodes during parsing of SRAT.

  This means elements are moved in the memblock array and we can end up
  in a different memblock after the call to numa_add_memblk().
  Result is:

  Unable to handle kernel paging request at virtual address 3a40
  Mem abort info:
ESR = 0x9604
EC = 0x25: DABT (current EL), IL = 32 bits
SET = 0, FnV = 0
EA = 0, S1PTW = 0
  Data abort info:
ISV = 0, ISS = 0x0004
CM = 0, WnR = 0
  [3a40] user address but active_mm is swapper
  Internal error: Oops: 9604 [#1] PREEMPT SMP

  ...

  Call trace:
sparse_init_nid+0x5c/0x2b0
sparse_init+0x138/0x170
bootmem_init+0x80/0xe0
setup_arch+0x2a0/0x5fc
start_kernel+0x8c/0x648

Replace the loop with a single call to memblock_set_node() to the entire
memory.

Signed-off-by: Mike Rapoport 
Acked-by: Jonathan Cameron 
Acked-by: Catalin Marinas 
---
 arch/arm64/mm/numa.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index aafcee3e3f7e..0cbdbcc885fb 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void))
  */
 static int __init dummy_numa_init(void)
 {
+   phys_addr_t start = memblock_start_of_DRAM();
+   phys_addr_t end = memblock_end_of_DRAM();
int ret;
-   struct memblock_region *mblk;
 
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
-   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
-   memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
-
-   for_each_memblock(memory, mblk) {
-   ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
-   if (!ret)
-   continue;
+   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
 
+   ret = numa_add_memblk(0, start, end);
+   if (ret) {
pr_err("NUMA init failed\n");
return ret;
}
-- 
2.26.2

[PATCH v2 03/17] arm, xtensa: simplify initialization of high memory pages

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

The function free_highpages() in both arm and xtensa essentially open-code
for_each_free_mem_range() loop to detect high memory pages that were not
reserved and that should be initialized and passed to the buddy allocator.

Replace open-coded implementation of for_each_free_mem_range() with usage
of memblock API to simplify the code.

Signed-off-by: Mike Rapoport 
Reviewed-by: Max Filippov   # xtensa
Tested-by: Max Filippov # xtensa
---
 arch/arm/mm/init.c| 48 +++--
 arch/xtensa/mm/init.c | 55 ---
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 01e18e43b174..626af348eb8f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -352,61 +352,29 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-#ifdef CONFIG_HIGHMEM
-static inline void free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-#endif
-
 static void __init free_highpages(void)
 {
 #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = memblock_region_reserved_base_pfn(res);
-   res_end = memblock_region_reserved_end_pfn(res);
-
-   if (res_end < start)
-   continue;
-   if (res_start < start)
-   res_start = start;
-   if (res_start > end)
-   res_start = end;
-   if (res_end > end)
-   res_end = end;
-   if (res_start != start)
-   free_area_high(start, res_start);
-   start = res_end;
-   if (start == end)
-   break;
-   }
-
-   /* And now free anything which remains */
-   if (start < end)
-   free_area_high(start, end);
+   for (; start < end; start++)
+   free_highmem_page(pfn_to_page(start));
}
 #endif
 }
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index a05b306cf371..ad9d59d93f39 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn);
 }
 
-#ifdef CONFIG_HIGHMEM
-static void __init free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-
 static void __init free_highpages(void)
 {
+#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
-   reset_all_zones_managed_pages();
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-

[PATCH v2 02/17] dma-contiguous: simplify cma_early_percent_memory()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in cma_early_percent_memory() traverses
memblock.memory rather than simply call memblock_phys_mem_size(). The
comment in that function suggests that at some point there should have been
call to memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
Reviewed-by: Christoph Hellwig 
---
 kernel/dma/contiguous.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 15bc5026c485..1992afd8ca7b 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -73,16 +73,7 @@ early_param("cma", early_cma);
 
 static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
 {
-   struct memblock_region *reg;
-   unsigned long total_pages = 0;
-
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   total_pages += memblock_region_memory_end_pfn(reg) -
-  memblock_region_memory_base_pfn(reg);
+   unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size());
 
return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
 }
-- 
2.26.2

[PATCH v2 01/17] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in kvm_cma_reserve() traverses memblock.memory
rather than simply call memblock_phys_mem_size(). The comment in that
function suggests that at some point there should have been call to
memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cd3cf3d366b..56ab0d28de2a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -95,22 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 void __init kvm_cma_reserve(void)
 {
unsigned long align_size;
-   struct memblock_region *reg;
-   phys_addr_t selected_size = 0;
+   phys_addr_t selected_size;
 
/*
 * We need CMA reservation only when we are in HV mode
 */
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   selected_size += memblock_region_memory_end_pfn(reg) -
-memblock_region_memory_base_pfn(reg);
 
+   selected_size = PHYS_PFN(memblock_phys_mem_size());
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << 
PAGE_SHIFT;
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
-- 
2.26.2

[PATCH v2 00/17] memblock: seasonal cleaning^w cleanup

2020-08-02 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

These patches simplify several uses of memblock iterators and hide some of
the memblock implementation details from the rest of the system.

The patches are on top of v5.8-rc7 + cherry-pick of "mm/sparse: cleanup the
code surrounding memory_present()" [1] from mmotm tree.

v2 changes:
* replace for_each_memblock() with two versions, one for memblock.memory
  and another one for memblock.reserved
* fix overzealous cleanup of powerpc fadamp: keep the traversal over the
  memblocks, but use better suited iterators
* don't remove traversal over memblock.reserved in x86 numa cleanup but
  replace for_each_memblock() with new for_each_reserved_mem_region()
* simplify ramdisk and crash kernel allocations on x86
* drop more redundant and unused code: __next_reserved_mem_region() and
  memblock_mem_size()
* add description of numa initialization fix on arm64 (thanks Jonathan)
* add Acked and Reviewed tags

[1] http://lkml.kernel.org/r/20200712083130.22919-1-r...@kernel.org 

Mike Rapoport (17):
  KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
  dma-contiguous: simplify cma_early_percent_memory()
  arm, xtensa: simplify initialization of high memory pages
  arm64: numa: simplify dummy_numa_init()
  h8300, nds32, openrisc: simplify detection of memory extents
  riscv: drop unneeded node initialization
  mircoblaze: drop unneeded NUMA and sparsemem initializations
  memblock: make for_each_memblock_type() iterator private
  memblock: make memblock_debug and related functionality private
  memblock: reduce number of parameters in for_each_mem_range()
  arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
  arch, drivers: replace for_each_membock() with for_each_mem_range()
  x86/setup: simplify initrd relocation and reservation
  x86/setup: simplify reserve_crashkernel()
  memblock: remove unused memblock_mem_size()
  memblock: implement for_each_reserved_mem_region() using __next_mem_region()
  memblock: use separate iterators for memory and reserved regions

 .clang-format|  4 +-
 arch/arm/kernel/setup.c  | 18 +++--
 arch/arm/mm/init.c   | 59 
 arch/arm/mm/mmu.c| 39 ---
 arch/arm/mm/pmsa-v7.c| 20 +++---
 arch/arm/mm/pmsa-v8.c| 17 +++--
 arch/arm/xen/mm.c|  7 +-
 arch/arm64/kernel/machine_kexec_file.c   |  6 +-
 arch/arm64/kernel/setup.c|  4 +-
 arch/arm64/mm/init.c | 11 ++-
 arch/arm64/mm/kasan_init.c   | 10 +--
 arch/arm64/mm/mmu.c  | 11 +--
 arch/arm64/mm/numa.c | 15 ++---
 arch/c6x/kernel/setup.c  |  9 +--
 arch/h8300/kernel/setup.c|  8 +--
 arch/microblaze/mm/init.c| 24 ++-
 arch/mips/cavium-octeon/dma-octeon.c | 12 ++--
 arch/mips/kernel/setup.c | 31 +
 arch/mips/netlogic/xlp/setup.c   |  2 +-
 arch/nds32/kernel/setup.c|  8 +--
 arch/openrisc/kernel/setup.c |  9 +--
 arch/openrisc/mm/init.c  |  8 ++-
 arch/powerpc/kernel/fadump.c | 57 
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 +--
 arch/powerpc/mm/book3s64/hash_utils.c| 16 ++---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ++-
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 +--
 arch/powerpc/mm/mem.c| 33 +
 arch/powerpc/mm/numa.c   |  7 +-
 arch/powerpc/mm/pgtable_32.c |  8 +--
 arch/riscv/mm/init.c | 34 +++---
 arch/riscv/mm/kasan_init.c   | 10 +--
 arch/s390/kernel/crash_dump.c|  8 +--
 arch/s390/kernel/setup.c | 31 +
 arch/s390/mm/page-states.c   |  6 +-
 arch/s390/mm/vmem.c  | 16 +++--
 arch/sh/mm/init.c|  9 +--
 arch/sparc/mm/init_64.c  | 12 ++--
 arch/x86/kernel/setup.c  | 56 +---
 arch/x86/mm/numa.c   |  2 +-
 arch/xtensa/mm/init.c| 55 +++
 drivers/bus/mvebu-mbus.c | 12 ++--
 drivers/irqchip/irq-gic-v3-its.c |  2 +-
 drivers/s390/char/zcore.c|  9 +--
 include/linux/memblock.h | 65 +-
 kernel/dma/contiguous.c  | 11 +--
 mm/memblock.c| 85 
 mm/page_alloc.c  | 11 ++-
 mm/sparse.c  | 10 ++-
 49 files changed, 366 insertions(+), 561 deletions(-)

-- 
2.26.2

Re: [PATCH 06/15] powerpc: fadamp: simplify fadump_reserve_crash_area()

2020-08-01 Thread Mike Rapoport

On Thu, Jul 30, 2020 at 10:15:13PM +1000, Michael Ellerman wrote:
> Mike Rapoport  writes:
> > From: Mike Rapoport 
> >
> > fadump_reserve_crash_area() reserves memory from a specified base address
> > till the end of the RAM.
> >
> > Replace iteration through the memblock.memory with a single call to
> > memblock_reserve() with appropriate  that will take care of proper memory
>  ^
>  parameters?
> > reservation.
> >
> > Signed-off-by: Mike Rapoport 
> > ---
> >  arch/powerpc/kernel/fadump.c | 20 +---
> >  1 file changed, 1 insertion(+), 19 deletions(-)
> 
> I think this looks OK to me, but I don't have a setup to test it easily.
> I've added Hari to Cc who might be able to.
> 
> But I'll give you an ack in the hope that it works :)

Actually, I did some digging in the git log and the traversal was added
there on purpose by the commit b71a693d3db3 ("powerpc/fadump: exclude
memory holes while reserving memory in second kernel")
Presuming this is still reqruired I'm going to drop this patch and will
simply replace for_each_memblock() with for_each_mem_range() in v2.
 
> Acked-by: Michael Ellerman 
> 
> 
> > diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
> > index 78ab9a6ee6ac..2446a61e3c25 100644
> > --- a/arch/powerpc/kernel/fadump.c
> > +++ b/arch/powerpc/kernel/fadump.c
> > @@ -1658,25 +1658,7 @@ int __init fadump_reserve_mem(void)
> >  /* Preserve everything above the base address */
> >  static void __init fadump_reserve_crash_area(u64 base)
> >  {
> > -   struct memblock_region *reg;
> > -   u64 mstart, msize;
> > -
> > -   for_each_memblock(memory, reg) {
> > -   mstart = reg->base;
> > -   msize  = reg->size;
> > -
> > -   if ((mstart + msize) < base)
> > -   continue;
> > -
> > -   if (mstart < base) {
> > -   msize -= (base - mstart);
> > -   mstart = base;
> > -   }
> > -
> > -   pr_info("Reserving %lluMB of memory at %#016llx for preserving 
> > crash data",
> > -   (msize >> 20), mstart);
> > -   memblock_reserve(mstart, msize);
> > -   }
> > +   memblock_reserve(base, memblock_end_of_DRAM() - base);
> >  }
> >  
> >  unsigned long __init arch_reserved_kernel_pages(void)
> > -- 
> > 2.26.2

-- 
Sincerely yours,
Mike.

Re: [PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved

2020-07-28 Thread Mike Rapoport

On Tue, Jul 28, 2020 at 07:02:54PM +0800, Baoquan He wrote:
> On 07/28/20 at 08:11am, Mike Rapoport wrote:
> > From: Mike Rapoport 
> > 
> > numa_clear_kernel_node_hotplug() function first traverses numa_meminfo
> > regions to set node ID in memblock.reserved and than traverses
> > memblock.reserved to update reserved_nodemask to include node IDs that were
> > set in the first loop.
> > 
> > Remove redundant traversal over memblock.reserved and update
> > reserved_nodemask while iterating over numa_meminfo.
> > 
> > Signed-off-by: Mike Rapoport 
> > ---
> >  arch/x86/mm/numa.c | 26 ++
> >  1 file changed, 10 insertions(+), 16 deletions(-)
> > 
> > diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
> > index 8ee952038c80..4078abd33938 100644
> > --- a/arch/x86/mm/numa.c
> > +++ b/arch/x86/mm/numa.c
> > @@ -498,31 +498,25 @@ static void __init 
> > numa_clear_kernel_node_hotplug(void)
> >  * and use those ranges to set the nid in memblock.reserved.
> >  * This will split up the memblock regions along node
> >  * boundaries and will set the node IDs as well.
> > +*
> > +* The nid will also be set in reserved_nodemask which is later
> > +* used to clear MEMBLOCK_HOTPLUG flag.
> > +*
> > +* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
> > +*   numa_meminfo might not include all memblock.reserved
> > +*   memory ranges, because quirks such as trim_snb_memory()
> > +*   reserve specific pages for Sandy Bridge graphics.
> > +*   These ranges will remain with nid == MAX_NUMNODES. ]
> >  */
> > for (i = 0; i < numa_meminfo.nr_blks; i++) {
> > struct numa_memblk *mb = numa_meminfo.blk + i;
> > int ret;
> >  
> > ret = memblock_set_node(mb->start, mb->end - mb->start, 
> > , mb->nid);
> > +   node_set(mb->nid, reserved_nodemask);
> 
> Really? This will set all node id into reserved_nodemask. But in the
> current code, it's setting nid into memblock reserved region which
> interleaves with numa_memoinfo, then get those nid and set it in
> reserved_nodemask. This is so different, with my understanding. Please
> correct me if I am wrong.

You are right, I've missed the intersections of numa_meminfo with
memblock.reserved.

x86 interaction with membock is so, hmm, interesting...
 
> Thanks
> Baoquan
> 
> > WARN_ON_ONCE(ret);
> > }
> >  
> > -   /*
> > -* Now go over all reserved memblock regions, to construct a
> > -* node mask of all kernel reserved memory areas.
> > -*
> > -* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
> > -*   numa_meminfo might not include all memblock.reserved
> > -*   memory ranges, because quirks such as trim_snb_memory()
> > -*   reserve specific pages for Sandy Bridge graphics. ]
> > -*/
> > -   for_each_memblock(reserved, mb_region) {
> > -   int nid = memblock_get_region_node(mb_region);
> > -
> > -   if (nid != MAX_NUMNODES)
> > -   node_set(nid, reserved_nodemask);
> > -   }
> > -
> > /*
> >  * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
> >  * belonging to the reserved node mask.
> > -- 
> > 2.26.2
> > 
> > 
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved

2020-07-28 Thread Mike Rapoport

On Tue, Jul 28, 2020 at 12:44:40PM +0200, Ingo Molnar wrote:
> 
> * Mike Rapoport  wrote:
> 
> > From: Mike Rapoport 
> > 
> > numa_clear_kernel_node_hotplug() function first traverses numa_meminfo
> > regions to set node ID in memblock.reserved and than traverses
> > memblock.reserved to update reserved_nodemask to include node IDs that were
> > set in the first loop.
> > 
> > Remove redundant traversal over memblock.reserved and update
> > reserved_nodemask while iterating over numa_meminfo.
> > 
> > Signed-off-by: Mike Rapoport 
> > ---
> >  arch/x86/mm/numa.c | 26 ++
> >  1 file changed, 10 insertions(+), 16 deletions(-)
> 
> I suspect you'd like to carry this in the -mm tree?

Yes.
 
> Acked-by: Ingo Molnar 

Thanks!

> Thanks,
> 
>   Ingo

-- 
Sincerely yours,
Mike.

[PATCH 15/15] memblock: remove 'type' parameter from for_each_memblock()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

for_each_memblock() is used exclusively to iterate over memblock.memory in
a few places that use data from memblock_region rather than the memory
ranges.

Remove type parameter from the for_each_memblock() iterator to improve
encapsulation of memblock internals from its users.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/kernel/setup.c  |  2 +-
 arch/arm64/mm/numa.c   |  2 +-
 arch/mips/netlogic/xlp/setup.c |  2 +-
 include/linux/memblock.h   | 10 +++---
 mm/memblock.c  |  4 ++--
 mm/page_alloc.c|  8 
 6 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c
index 93b3844cf442..23da7908cbed 100644
--- a/arch/arm64/kernel/setup.c
+++ b/arch/arm64/kernel/setup.c
@@ -217,7 +217,7 @@ static void __init request_standard_resources(void)
if (!standard_resources)
panic("%s: Failed to allocate %zu bytes\n", __func__, res_size);
 
-   for_each_memblock(memory, region) {
+   for_each_memblock(region) {
res = _resources[i++];
if (memblock_is_nomap(region)) {
res->name  = "reserved";
diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index 0cbdbcc885fb..08721d2c0b79 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -350,7 +350,7 @@ static int __init numa_register_nodes(void)
struct memblock_region *mblk;
 
/* Check that valid nid is set to memblks */
-   for_each_memblock(memory, mblk) {
+   for_each_memblock(mblk) {
int mblk_nid = memblock_get_region_node(mblk);
 
if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
diff --git a/arch/mips/netlogic/xlp/setup.c b/arch/mips/netlogic/xlp/setup.c
index 1a0fc5b62ba4..e69d9fc468cf 100644
--- a/arch/mips/netlogic/xlp/setup.c
+++ b/arch/mips/netlogic/xlp/setup.c
@@ -70,7 +70,7 @@ static void nlm_fixup_mem(void)
const int pref_backup = 512;
struct memblock_region *mem;
 
-   for_each_memblock(memory, mem) {
+   for_each_memblock(mem) {
memblock_remove(mem->base + mem->size - pref_backup,
pref_backup);
}
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index d70c2835e913..c901cb8ecf92 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -527,9 +527,13 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
return PFN_UP(reg->base + reg->size);
 }
 
-#define for_each_memblock(memblock_type, region)   
\
-   for (region = memblock.memblock_type.regions;   
\
-region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
+/**
+ * for_each_memblock - itereate over registered memory regions
+ * @region: loop variable
+ */
+#define for_each_memblock(region)  \
+   for (region = memblock.memory.regions;  \
+region < (memblock.memory.regions + memblock.memory.cnt);  \
 region++)
 
 extern void *alloc_large_system_hash(const char *tablename,
diff --git a/mm/memblock.c b/mm/memblock.c
index 2ad5e6e47215..550bb72cf6cb 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1694,7 +1694,7 @@ static phys_addr_t __init_memblock 
__find_max_addr(phys_addr_t limit)
 * the memory memblock regions, if the @limit exceeds the total size
 * of those regions, max_addr will keep original value PHYS_ADDR_MAX
 */
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
if (limit <= r->size) {
max_addr = r->base + limit;
break;
@@ -1864,7 +1864,7 @@ void __init_memblock memblock_trim_memory(phys_addr_t 
align)
phys_addr_t start, end, orig_start, orig_end;
struct memblock_region *r;
 
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
orig_start = r->base;
orig_end = r->base + r->size;
start = round_up(orig_start, align);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 95af111d69d3..8a19f46dc86e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5927,7 +5927,7 @@ overlap_memmap_init(unsigned long zone, unsigned long 
*pfn)
 
if (mirrored_kernelcore && zone == ZONE_MOVABLE) {
if (!r || *pfn >= memblock_region_memory_end_pfn(r)) {
-   for_each_memblock(memory, r) {
+   for_each_memblock(r) {
if (*pfn < memblock_region_memory_end_pfn(r))
break;
}
@@ -6528,7 +6528,7 @@ static unsigned long __init zone_absent_pages_in_node(int 
nid,

[PATCH 14/15] x86/numa: remove redundant iteration over memblock.reserved

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

numa_clear_kernel_node_hotplug() function first traverses numa_meminfo
regions to set node ID in memblock.reserved and than traverses
memblock.reserved to update reserved_nodemask to include node IDs that were
set in the first loop.

Remove redundant traversal over memblock.reserved and update
reserved_nodemask while iterating over numa_meminfo.

Signed-off-by: Mike Rapoport 
---
 arch/x86/mm/numa.c | 26 ++
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c
index 8ee952038c80..4078abd33938 100644
--- a/arch/x86/mm/numa.c
+++ b/arch/x86/mm/numa.c
@@ -498,31 +498,25 @@ static void __init numa_clear_kernel_node_hotplug(void)
 * and use those ranges to set the nid in memblock.reserved.
 * This will split up the memblock regions along node
 * boundaries and will set the node IDs as well.
+*
+* The nid will also be set in reserved_nodemask which is later
+* used to clear MEMBLOCK_HOTPLUG flag.
+*
+* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
+*   numa_meminfo might not include all memblock.reserved
+*   memory ranges, because quirks such as trim_snb_memory()
+*   reserve specific pages for Sandy Bridge graphics.
+*   These ranges will remain with nid == MAX_NUMNODES. ]
 */
for (i = 0; i < numa_meminfo.nr_blks; i++) {
struct numa_memblk *mb = numa_meminfo.blk + i;
int ret;
 
ret = memblock_set_node(mb->start, mb->end - mb->start, 
, mb->nid);
+   node_set(mb->nid, reserved_nodemask);
WARN_ON_ONCE(ret);
}
 
-   /*
-* Now go over all reserved memblock regions, to construct a
-* node mask of all kernel reserved memory areas.
-*
-* [ Note, when booting with mem=nn[kMG] or in a kdump kernel,
-*   numa_meminfo might not include all memblock.reserved
-*   memory ranges, because quirks such as trim_snb_memory()
-*   reserve specific pages for Sandy Bridge graphics. ]
-*/
-   for_each_memblock(reserved, mb_region) {
-   int nid = memblock_get_region_node(mb_region);
-
-   if (nid != MAX_NUMNODES)
-   node_set(nid, reserved_nodemask);
-   }
-
/*
 * Finally, clear the MEMBLOCK_HOTPLUG flag for all memory
 * belonging to the reserved node mask.
-- 
2.26.2

[PATCH 13/15] arch, drivers: replace for_each_membock() with for_each_mem_range()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start = __pfn_to_phys(memblock_region_memory_base_pfn(reg);
end = __pfn_to_phys(memblock_region_memory_end_pfn(reg));

/* do something with start and end */
}

Using for_each_mem_range() iterator is more appropriate in such cases and
allows simpler and cleaner code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/kernel/setup.c  | 18 +++
 arch/arm/mm/mmu.c| 39 
 arch/arm/mm/pmsa-v7.c| 20 ++--
 arch/arm/mm/pmsa-v8.c| 17 +--
 arch/arm/xen/mm.c|  7 +++--
 arch/arm64/mm/kasan_init.c   |  8 ++---
 arch/arm64/mm/mmu.c  | 11 ++-
 arch/c6x/kernel/setup.c  |  9 +++---
 arch/microblaze/mm/init.c|  9 +++---
 arch/mips/cavium-octeon/dma-octeon.c | 12 
 arch/mips/kernel/setup.c | 31 +--
 arch/openrisc/mm/init.c  |  8 +++--
 arch/powerpc/kernel/fadump.c | 27 +++-
 arch/powerpc/mm/book3s64/hash_utils.c| 16 +-
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 +++
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 ++---
 arch/powerpc/mm/mem.c| 16 ++
 arch/powerpc/mm/pgtable_32.c |  8 ++---
 arch/riscv/mm/init.c | 24 ++-
 arch/riscv/mm/kasan_init.c   | 10 +++---
 arch/s390/kernel/setup.c | 27 ++--
 arch/s390/mm/vmem.c  | 16 +-
 arch/sparc/mm/init_64.c  | 12 +++-
 drivers/bus/mvebu-mbus.c | 12 
 drivers/s390/char/zcore.c|  9 +++---
 25 files changed, 187 insertions(+), 198 deletions(-)

diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c
index d8e18cdd96d3..3f65d0ac9f63 100644
--- a/arch/arm/kernel/setup.c
+++ b/arch/arm/kernel/setup.c
@@ -843,19 +843,25 @@ early_param("mem", early_mem);
 
 static void __init request_standard_resources(const struct machine_desc *mdesc)
 {
-   struct memblock_region *region;
+   phys_addr_t start, end, res_end;
struct resource *res;
+   u64 i;
 
kernel_code.start   = virt_to_phys(_text);
kernel_code.end = virt_to_phys(__init_begin - 1);
kernel_data.start   = virt_to_phys(_sdata);
kernel_data.end = virt_to_phys(_end - 1);
 
-   for_each_memblock(memory, region) {
-   phys_addr_t start = 
__pfn_to_phys(memblock_region_memory_base_pfn(region));
-   phys_addr_t end = 
__pfn_to_phys(memblock_region_memory_end_pfn(region)) - 1;
+   for_each_mem_range(i, , ) {
unsigned long boot_alias_start;
 
+   /*
+* In memblock, end points to the first byte after the
+* range while in resourses, end points to the last byte in
+* the range.
+*/
+   res_end = end - 1;
+
/*
 * Some systems have a special memory alias which is only
 * used for booting.  We need to advertise this region to
@@ -869,7 +875,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  __func__, sizeof(*res));
res->name = "System RAM (boot alias)";
res->start = boot_alias_start;
-   res->end = phys_to_idmap(end);
+   res->end = phys_to_idmap(res_end);
res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
request_resource(_resource, res);
}
@@ -880,7 +886,7 @@ static void __init request_standard_resources(const struct 
machine_desc *mdesc)
  sizeof(*res));
res->name  = "System RAM";
res->start = start;
-   res->end = end;
+   res->end = res_end;
res->flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
request_resource(_resource, res);
diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c
index 628028bfbb92..a149d9cb4fdb 100644
--- a/arch/arm/mm/mmu.c
+++ b/arch/arm/mm/mmu.c
@@ -1155,9 +1155,8 @@ phys_addr_t arm_lowmem_limit __initdata = 0;
 
 void __init adjust_lowmem_bounds(void)
 {
-   phys_addr_t memblock_limit = 0;
-   u64 vmalloc_limit;
-   struct memblock_region *reg;
+   phys_addr_t block_start, block_end, memblock_limit = 0;
+   u64 vmalloc_limit, i;
phys_addr_t lowmem_limit = 0;
 
/*
@@ -1173,26 +1172,18 @@ void __init adjust_lowmem_bounds(void)
 * The first usable region must be PMD alig

[PATCH 12/15] arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

There are several occurrences of the following pattern:

for_each_memblock(memory, reg) {
start_pfn = memblock_region_memory_base_pfn(reg);
end_pfn = memblock_region_memory_end_pfn(reg);

/* do something with start_pfn and end_pfn */
}

Rather than iterate over all memblock.memory regions and each time query
for their start and end PFNs, use for_each_mem_pfn_range() iterator to get
simpler and clearer code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c   | 11 ---
 arch/arm64/mm/init.c | 11 ---
 arch/powerpc/kernel/fadump.c | 11 ++-
 arch/powerpc/mm/mem.c| 15 ---
 arch/powerpc/mm/numa.c   |  7 ++-
 arch/s390/mm/page-states.c   |  6 ++
 arch/sh/mm/init.c|  9 +++--
 mm/memblock.c|  6 ++
 mm/sparse.c  | 10 --
 9 files changed, 35 insertions(+), 51 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 626af348eb8f..bb56668b4f54 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -304,16 +304,14 @@ free_memmap(unsigned long start_pfn, unsigned long 
end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
+   unsigned long start, end, prev_end = 0;
+   int i;
 
/*
 * This relies on each bank being in address order.
 * The banks are sorted previously in bootmem_init().
 */
-   for_each_memblock(memory, reg) {
-   start = memblock_region_memory_base_pfn(reg);
-
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist
@@ -341,8 +339,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(memblock_region_memory_end_pfn(reg),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c
index 1e93cfc7c47a..271a8ea32482 100644
--- a/arch/arm64/mm/init.c
+++ b/arch/arm64/mm/init.c
@@ -473,12 +473,10 @@ static inline void free_memmap(unsigned long start_pfn, 
unsigned long end_pfn)
  */
 static void __init free_unused_memmap(void)
 {
-   unsigned long start, prev_end = 0;
-   struct memblock_region *reg;
-
-   for_each_memblock(memory, reg) {
-   start = __phys_to_pfn(reg->base);
+   unsigned long start, end, prev_end = 0;
+   int i;
 
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, , , NULL) {
 #ifdef CONFIG_SPARSEMEM
/*
 * Take care not to free memmap entries that don't exist due
@@ -498,8 +496,7 @@ static void __init free_unused_memmap(void)
 * memmap entries are valid from the bank end aligned to
 * MAX_ORDER_NR_PAGES.
 */
-   prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size),
-MAX_ORDER_NR_PAGES);
+   prev_end = ALIGN(end, MAX_ORDER_NR_PAGES);
}
 
 #ifdef CONFIG_SPARSEMEM
diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 2446a61e3c25..fdbafe417139 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1216,14 +1216,15 @@ static void fadump_free_reserved_memory(unsigned long 
start_pfn,
  */
 static void fadump_release_reserved_area(u64 start, u64 end)
 {
-   u64 tstart, tend, spfn, epfn;
-   struct memblock_region *reg;
+   u64 tstart, tend, spfn, epfn, reg_spfn, reg_epfn, i;
 
spfn = PHYS_PFN(start);
epfn = PHYS_PFN(end);
-   for_each_memblock(memory, reg) {
-   tstart = max_t(u64, spfn, memblock_region_memory_base_pfn(reg));
-   tend   = min_t(u64, epfn, memblock_region_memory_end_pfn(reg));
+
+   for_each_mem_pfn_range(i, NUMA_NO_NODE, _spfn, _epfn, NULL) {
+   tstart = max_t(u64, spfn, reg_spfn);
+   tend   = min_t(u64, epfn, reg_epfn);
+
if (tstart < tend) {
fadump_free_reserved_memory(tstart, tend);
 
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index c2c11eb8dcfc..38d1acd7c8ef 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -192,15 +192,16 @@ void __init initmem_init(void)
 /* mark pages that don't exist as nosave */
 static int __init mark_nonram_nosave(void)
 {
-   struct memblock_region *reg, *prev = NULL;
+   unsigned long spfn, epfn, prev = 0;
+   int i;
 
-   for_each_memblock(memory, reg) {
-   if (prev &&
-   memblock_region

[PATCH 11/15] memblock: reduce number of parameters in for_each_mem_range()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

Currently for_each_mem_range() iterator is the most generic way to traverse
memblock regions. As such, it has 8 parameters and it is hardly convenient
to users. Most users choose to utilize one of its wrappers and the only
user that actually needs most of the parameters outside memblock is s390
crash dump implementation.

To avoid yet another naming for memblock iterators, rename the existing
for_each_mem_range() to __for_each_mem_range() and add a new
for_each_mem_range() wrapper with only index, start and end parameters.

The new wrapper nicely fits into init_unavailable_mem() and will be used in
upcoming changes to simplify memblock traversals.

Signed-off-by: Mike Rapoport 
---
 .clang-format  |  1 +
 arch/arm64/kernel/machine_kexec_file.c |  6 ++
 arch/s390/kernel/crash_dump.c  |  8 
 include/linux/memblock.h   | 18 ++
 mm/page_alloc.c|  3 +--
 5 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/.clang-format b/.clang-format
index a0a96088c74f..52ededab25ce 100644
--- a/.clang-format
+++ b/.clang-format
@@ -205,6 +205,7 @@ ForEachMacros:
   - 'for_each_memblock_type'
   - 'for_each_memcg_cache_index'
   - 'for_each_mem_pfn_range'
+  - '__for_each_mem_range'
   - 'for_each_mem_range'
   - 'for_each_mem_range_rev'
   - 'for_each_migratetype_order'
diff --git a/arch/arm64/kernel/machine_kexec_file.c 
b/arch/arm64/kernel/machine_kexec_file.c
index 361a1143e09e..5b0e67b93cdc 100644
--- a/arch/arm64/kernel/machine_kexec_file.c
+++ b/arch/arm64/kernel/machine_kexec_file.c
@@ -215,8 +215,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
phys_addr_t start, end;
 
nr_ranges = 1; /* for exclusion of crashkernel region */
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL)
+   for_each_mem_range(i, , )
nr_ranges++;
 
cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL);
@@ -225,8 +224,7 @@ static int prepare_elf_headers(void **addr, unsigned long 
*sz)
 
cmem->max_nr_ranges = nr_ranges;
cmem->nr_ranges = 0;
-   for_each_mem_range(i, , NULL, NUMA_NO_NODE,
-   MEMBLOCK_NONE, , , NULL) {
+   for_each_mem_range(i, , ) {
cmem->ranges[cmem->nr_ranges].start = start;
cmem->ranges[cmem->nr_ranges].end = end - 1;
cmem->nr_ranges++;
diff --git a/arch/s390/kernel/crash_dump.c b/arch/s390/kernel/crash_dump.c
index f96a5857bbfd..e28085c725ff 100644
--- a/arch/s390/kernel/crash_dump.c
+++ b/arch/s390/kernel/crash_dump.c
@@ -549,8 +549,8 @@ static int get_mem_chunk_cnt(void)
int cnt = 0;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, NULL, NULL, NULL)
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, NULL, NULL, NULL)
cnt++;
return cnt;
 }
@@ -563,8 +563,8 @@ static void loads_init(Elf64_Phdr *phdr, u64 loads_offset)
phys_addr_t start, end;
u64 idx;
 
-   for_each_mem_range(idx, , _type, NUMA_NO_NODE,
-  MEMBLOCK_NONE, , , NULL) {
+   __for_each_mem_range(idx, , _type, NUMA_NO_NODE,
+MEMBLOCK_NONE, , , NULL) {
phdr->p_filesz = end - start;
phdr->p_type = PT_LOAD;
phdr->p_offset = start;
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index e6a23b3db696..d70c2835e913 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -142,7 +142,7 @@ void __next_reserved_mem_region(u64 *idx, phys_addr_t 
*out_start,
 void __memblock_free_late(phys_addr_t base, phys_addr_t size);
 
 /**
- * for_each_mem_range - iterate through memblock areas from type_a and not
+ * __for_each_mem_range - iterate through memblock areas from type_a and not
  * included in type_b. Or just type_a if type_b is NULL.
  * @i: u64 used as loop variable
  * @type_a: ptr to memblock_type to iterate
@@ -153,7 +153,7 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
  * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL
  * @p_nid: ptr to int for nid of the range, can be %NULL
  */
-#define for_each_mem_range(i, type_a, type_b, nid, flags,  \
+#define __for_each_mem_range(i, type_a, type_b, nid, flags,\
   p_start, p_end, p_nid)   \
for (i = 0, __next_mem_range(, nid, flags, type_a, type_b,\
 p_start, p_end, p_nid);\
@@ -182,6 +182,16 @@ void __memblock_free_late(phys_addr_t base, phys_addr_t 
size);
 __next_mem_range_rev(, nid, flags, type_a, type_b,   \

[PATCH 10/15] memblock: make memblock_debug and related functionality private

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

The only user of memblock_dbg() outside memblock was s390 setup code and it
is converted to use pr_debug() instead.
This allows to stop exposing memblock_debug and memblock_dbg() to the rest
of the kernel.

Signed-off-by: Mike Rapoport 
---
 arch/s390/kernel/setup.c |  4 ++--
 include/linux/memblock.h | 12 +---
 mm/memblock.c| 13 +++--
 3 files changed, 14 insertions(+), 15 deletions(-)

diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c
index 07aa15ba43b3..8b284cf6e199 100644
--- a/arch/s390/kernel/setup.c
+++ b/arch/s390/kernel/setup.c
@@ -776,8 +776,8 @@ static void __init memblock_add_mem_detect_info(void)
unsigned long start, end;
int i;
 
-   memblock_dbg("physmem info source: %s (%hhd)\n",
-get_mem_info_source(), mem_detect.info_source);
+   pr_debug("physmem info source: %s (%hhd)\n",
+get_mem_info_source(), mem_detect.info_source);
/* keep memblock lists close to the kernel */
memblock_set_bottom_up(true);
for_each_mem_detect_block(i, , ) {
diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 220b5f0dad42..e6a23b3db696 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -90,7 +90,6 @@ struct memblock {
 };
 
 extern struct memblock memblock;
-extern int memblock_debug;
 
 #ifndef CONFIG_ARCH_KEEP_MEMBLOCK
 #define __init_memblock __meminit
@@ -102,9 +101,6 @@ void memblock_discard(void);
 static inline void memblock_discard(void) {}
 #endif
 
-#define memblock_dbg(fmt, ...) \
-   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
-
 phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end,
   phys_addr_t size, phys_addr_t align);
 void memblock_allow_resize(void);
@@ -456,13 +452,7 @@ bool memblock_is_region_memory(phys_addr_t base, 
phys_addr_t size);
 bool memblock_is_reserved(phys_addr_t addr);
 bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size);
 
-extern void __memblock_dump_all(void);
-
-static inline void memblock_dump_all(void)
-{
-   if (memblock_debug)
-   __memblock_dump_all();
-}
+void memblock_dump_all(void);
 
 /**
  * memblock_set_current_limit - Set the current allocation limit to allow
diff --git a/mm/memblock.c b/mm/memblock.c
index a5b9b3df81fc..824938849f6d 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -134,7 +134,10 @@ struct memblock memblock __initdata_memblock = {
 i < memblock_type->cnt;\
 i++, rgn = _type->regions[i])
 
-int memblock_debug __initdata_memblock;
+#define memblock_dbg(fmt, ...) \
+   if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__)
+
+static int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
 static int memblock_memory_in_slab __initdata_memblock = 0;
@@ -1919,7 +1922,7 @@ static void __init_memblock memblock_dump(struct 
memblock_type *type)
}
 }
 
-void __init_memblock __memblock_dump_all(void)
+static void __init_memblock __memblock_dump_all(void)
 {
pr_info("MEMBLOCK configuration:\n");
pr_info(" memory size = %pa reserved size = %pa\n",
@@ -1933,6 +1936,12 @@ void __init_memblock __memblock_dump_all(void)
 #endif
 }
 
+void __init_memblock memblock_dump_all(void)
+{
+   if (memblock_debug)
+   __memblock_dump_all();
+}
+
 void __init memblock_allow_resize(void)
 {
memblock_can_resize = 1;
-- 
2.26.2

[PATCH 09/15] memblock: make for_each_memblock_type() iterator private

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

for_each_memblock_type() is not used outside mm/memblock.c, move it there
from include/linux/memblock.h

Signed-off-by: Mike Rapoport 
---
 include/linux/memblock.h | 5 -
 mm/memblock.c| 5 +
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/include/linux/memblock.h b/include/linux/memblock.h
index 017fae833d4a..220b5f0dad42 100644
--- a/include/linux/memblock.h
+++ b/include/linux/memblock.h
@@ -532,11 +532,6 @@ static inline unsigned long 
memblock_region_reserved_end_pfn(const struct memblo
 region < (memblock.memblock_type.regions + 
memblock.memblock_type.cnt);\
 region++)
 
-#define for_each_memblock_type(i, memblock_type, rgn)  \
-   for (i = 0, rgn = _type->regions[0];   \
-i < memblock_type->cnt;\
-i++, rgn = _type->regions[i])
-
 extern void *alloc_large_system_hash(const char *tablename,
 unsigned long bucketsize,
 unsigned long numentries,
diff --git a/mm/memblock.c b/mm/memblock.c
index 39aceafc57f6..a5b9b3df81fc 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -129,6 +129,11 @@ struct memblock memblock __initdata_memblock = {
.current_limit  = MEMBLOCK_ALLOC_ANYWHERE,
 };
 
+#define for_each_memblock_type(i, memblock_type, rgn)  \
+   for (i = 0, rgn = _type->regions[0];   \
+i < memblock_type->cnt;\
+i++, rgn = _type->regions[i])
+
 int memblock_debug __initdata_memblock;
 static bool system_has_some_mirror __initdata_memblock = false;
 static int memblock_can_resize __initdata_memblock;
-- 
2.26.2

[PATCH 08/15] mircoblaze: drop unneeded NUMA and sparsemem initializations

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

microblaze does not support neither NUMA not SPARSMEM, so there is no point
to call memblock_set_node() and sparse_memory_present_with_active_regions()
functions during microblaze memory initialization.

Remove these calls and the surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/microblaze/mm/init.c | 17 +
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/arch/microblaze/mm/init.c b/arch/microblaze/mm/init.c
index 521b59ba716c..49e0c241f9b1 100644
--- a/arch/microblaze/mm/init.c
+++ b/arch/microblaze/mm/init.c
@@ -105,9 +105,8 @@ static void __init paging_init(void)
 
 void __init setup_memory(void)
 {
-   struct memblock_region *reg;
-
 #ifndef CONFIG_MMU
+   struct memblock_region *reg;
u32 kernel_align_start, kernel_align_size;
 
/* Find main memory where is the kernel */
@@ -161,20 +160,6 @@ void __init setup_memory(void)
pr_info("%s: max_low_pfn: %#lx\n", __func__, max_low_pfn);
pr_info("%s: max_pfn: %#lx\n", __func__, max_pfn);
 
-   /* Add active regions with valid PFNs */
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn, end_pfn;
-
-   start_pfn = memblock_region_memory_base_pfn(reg);
-   end_pfn = memblock_region_memory_end_pfn(reg);
-   memblock_set_node(start_pfn << PAGE_SHIFT,
- (end_pfn - start_pfn) << PAGE_SHIFT,
- , 0);
-   }
-
-   /* XXX need to clip this if using highmem? */
-   sparse_memory_present_with_active_regions(0);
-
paging_init();
 }
 
-- 
2.26.2

[PATCH 07/15] riscv: drop unneeded node initialization

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

RISC-V does not (yet) support NUMA  and for UMA architectures node 0 is
used implicitly during early memory initialization.

There is no need to call memblock_set_node(), remove this call and the
surrounding code.

Signed-off-by: Mike Rapoport 
---
 arch/riscv/mm/init.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c
index 79e9d55bdf1a..7440ba2cdaaa 100644
--- a/arch/riscv/mm/init.c
+++ b/arch/riscv/mm/init.c
@@ -191,15 +191,6 @@ void __init setup_bootmem(void)
early_init_fdt_scan_reserved_mem();
memblock_allow_resize();
memblock_dump_all();
-
-   for_each_memblock(memory, reg) {
-   unsigned long start_pfn = memblock_region_memory_base_pfn(reg);
-   unsigned long end_pfn = memblock_region_memory_end_pfn(reg);
-
-   memblock_set_node(PFN_PHYS(start_pfn),
- PFN_PHYS(end_pfn - start_pfn),
- , 0);
-   }
 }
 
 #ifdef CONFIG_MMU
-- 
2.26.2

[PATCH 06/15] powerpc: fadamp: simplify fadump_reserve_crash_area()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

fadump_reserve_crash_area() reserves memory from a specified base address
till the end of the RAM.

Replace iteration through the memblock.memory with a single call to
memblock_reserve() with appropriate  that will take care of proper memory
reservation.

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kernel/fadump.c | 20 +---
 1 file changed, 1 insertion(+), 19 deletions(-)

diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c
index 78ab9a6ee6ac..2446a61e3c25 100644
--- a/arch/powerpc/kernel/fadump.c
+++ b/arch/powerpc/kernel/fadump.c
@@ -1658,25 +1658,7 @@ int __init fadump_reserve_mem(void)
 /* Preserve everything above the base address */
 static void __init fadump_reserve_crash_area(u64 base)
 {
-   struct memblock_region *reg;
-   u64 mstart, msize;
-
-   for_each_memblock(memory, reg) {
-   mstart = reg->base;
-   msize  = reg->size;
-
-   if ((mstart + msize) < base)
-   continue;
-
-   if (mstart < base) {
-   msize -= (base - mstart);
-   mstart = base;
-   }
-
-   pr_info("Reserving %lluMB of memory at %#016llx for preserving 
crash data",
-   (msize >> 20), mstart);
-   memblock_reserve(mstart, msize);
-   }
+   memblock_reserve(base, memblock_end_of_DRAM() - base);
 }
 
 unsigned long __init arch_reserved_kernel_pages(void)
-- 
2.26.2

[PATCH 05/15] h8300, nds32, openrisc: simplify detection of memory extents

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

Instead of traversing memblock.memory regions to find memory_start and
memory_end, simply query memblock_{start,end}_of_DRAM().

Signed-off-by: Mike Rapoport 
---
 arch/h8300/kernel/setup.c| 8 +++-
 arch/nds32/kernel/setup.c| 8 ++--
 arch/openrisc/kernel/setup.c | 9 ++---
 3 files changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/h8300/kernel/setup.c b/arch/h8300/kernel/setup.c
index 28ac88358a89..0281f92eea3d 100644
--- a/arch/h8300/kernel/setup.c
+++ b/arch/h8300/kernel/setup.c
@@ -74,17 +74,15 @@ static void __init bootmem_init(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end)
panic("No memory!");
 
/* setup bootmem globals (we use no_bootmem, but mm still depends on 
this) */
min_low_pfn = PFN_UP(memory_start);
-   max_low_pfn = PFN_DOWN(memblock_end_of_DRAM());
+   max_low_pfn = PFN_DOWN(memory_end);
max_pfn = max_low_pfn;
 
memblock_reserve(__pa(_stext), _end - _stext);
diff --git a/arch/nds32/kernel/setup.c b/arch/nds32/kernel/setup.c
index a066efbe53c0..c356e484dcab 100644
--- a/arch/nds32/kernel/setup.c
+++ b/arch/nds32/kernel/setup.c
@@ -249,12 +249,8 @@ static void __init setup_memory(void)
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   pr_info("%s: Memory: 0x%x-0x%x\n", __func__,
-   memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index 8aa438e1f51f..c5706153d3b6 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -48,17 +48,12 @@ static void __init setup_memory(void)
unsigned long ram_start_pfn;
unsigned long ram_end_pfn;
phys_addr_t memory_start, memory_end;
-   struct memblock_region *region;
 
memory_end = memory_start = 0;
 
/* Find main memory where is the kernel, we assume its the only one */
-   for_each_memblock(memory, region) {
-   memory_start = region->base;
-   memory_end = region->base + region->size;
-   printk(KERN_INFO "%s: Memory: 0x%x-0x%x\n", __func__,
-  memory_start, memory_end);
-   }
+   memory_start = memblock_start_of_DRAM();
+   memory_end = memblock_end_of_DRAM();
 
if (!memory_end) {
panic("No memory!");
-- 
2.26.2

[PATCH 04/15] arm64: numa: simplify dummy_numa_init()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

dummy_numa_init() loops over memblock.memory and passes nid=0 to
numa_add_memblk() which essentially wraps memblock_set_node(). However,
memblock_set_node() can cope with entire memory span itself, so the loop
over memblock.memory regions is redundant.

Replace the loop with a single call to memblock_set_node() to the entire
memory.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/mm/numa.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/mm/numa.c b/arch/arm64/mm/numa.c
index aafcee3e3f7e..0cbdbcc885fb 100644
--- a/arch/arm64/mm/numa.c
+++ b/arch/arm64/mm/numa.c
@@ -423,19 +423,16 @@ static int __init numa_init(int (*init_func)(void))
  */
 static int __init dummy_numa_init(void)
 {
+   phys_addr_t start = memblock_start_of_DRAM();
+   phys_addr_t end = memblock_end_of_DRAM();
int ret;
-   struct memblock_region *mblk;
 
if (numa_off)
pr_info("NUMA disabled\n"); /* Forced off on command line. */
-   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n",
-   memblock_start_of_DRAM(), memblock_end_of_DRAM() - 1);
-
-   for_each_memblock(memory, mblk) {
-   ret = numa_add_memblk(0, mblk->base, mblk->base + mblk->size);
-   if (!ret)
-   continue;
+   pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
 
+   ret = numa_add_memblk(0, start, end);
+   if (ret) {
pr_err("NUMA init failed\n");
return ret;
}
-- 
2.26.2

[PATCH 03/15] arm, xtensa: simplify initialization of high memory pages

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

The function free_highpages() in both arm and xtensa essentially open-code
for_each_free_mem_range() loop to detect high memory pages that were not
reserved and that should be initialized and passed to the buddy allocator.

Replace open-coded implementation of for_each_free_mem_range() with usage
of memblock API to simplify the code.

Signed-off-by: Mike Rapoport 
---
 arch/arm/mm/init.c| 48 +++--
 arch/xtensa/mm/init.c | 55 ---
 2 files changed, 18 insertions(+), 85 deletions(-)

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 01e18e43b174..626af348eb8f 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -352,61 +352,29 @@ static void __init free_unused_memmap(void)
 #endif
 }
 
-#ifdef CONFIG_HIGHMEM
-static inline void free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-#endif
-
 static void __init free_highpages(void)
 {
 #ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = memblock_region_reserved_base_pfn(res);
-   res_end = memblock_region_reserved_end_pfn(res);
-
-   if (res_end < start)
-   continue;
-   if (res_start < start)
-   res_start = start;
-   if (res_start > end)
-   res_start = end;
-   if (res_end > end)
-   res_end = end;
-   if (res_start != start)
-   free_area_high(start, res_start);
-   start = res_end;
-   if (start == end)
-   break;
-   }
-
-   /* And now free anything which remains */
-   if (start < end)
-   free_area_high(start, end);
+   for (; start < end; start++)
+   free_highmem_page(pfn_to_page(start));
}
 #endif
 }
diff --git a/arch/xtensa/mm/init.c b/arch/xtensa/mm/init.c
index a05b306cf371..ad9d59d93f39 100644
--- a/arch/xtensa/mm/init.c
+++ b/arch/xtensa/mm/init.c
@@ -79,67 +79,32 @@ void __init zones_init(void)
free_area_init(max_zone_pfn);
 }
 
-#ifdef CONFIG_HIGHMEM
-static void __init free_area_high(unsigned long pfn, unsigned long end)
-{
-   for (; pfn < end; pfn++)
-   free_highmem_page(pfn_to_page(pfn));
-}
-
 static void __init free_highpages(void)
 {
+#ifdef CONFIG_HIGHMEM
unsigned long max_low = max_low_pfn;
-   struct memblock_region *mem, *res;
+   phys_addr_t range_start, range_end;
+   u64 i;
 
-   reset_all_zones_managed_pages();
/* set highmem page free */
-   for_each_memblock(memory, mem) {
-   unsigned long start = memblock_region_memory_base_pfn(mem);
-   unsigned long end = memblock_region_memory_end_pfn(mem);
+   for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE,
+   _start, _end, NULL) {
+   unsigned long start = PHYS_PFN(range_start);
+   unsigned long end = PHYS_PFN(range_end);
 
/* Ignore complete lowmem entries */
if (end <= max_low)
continue;
 
-   if (memblock_is_nomap(mem))
-   continue;
-
/* Truncate partial highmem entries */
if (start < max_low)
start = max_low;
 
-   /* Find and exclude any reserved regions */
-   for_each_memblock(reserved, res) {
-   unsigned long res_start, res_end;
-
-   res_start = m

[PATCH 02/15] dma-contiguous: simplify cma_early_percent_memory()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in cma_early_percent_memory() traverses
memblock.memory rather than simply call memblock_phys_mem_size(). The
comment in that function suggests that at some point there should have been
call to memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 kernel/dma/contiguous.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/kernel/dma/contiguous.c b/kernel/dma/contiguous.c
index 15bc5026c485..1992afd8ca7b 100644
--- a/kernel/dma/contiguous.c
+++ b/kernel/dma/contiguous.c
@@ -73,16 +73,7 @@ early_param("cma", early_cma);
 
 static phys_addr_t __init __maybe_unused cma_early_percent_memory(void)
 {
-   struct memblock_region *reg;
-   unsigned long total_pages = 0;
-
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   total_pages += memblock_region_memory_end_pfn(reg) -
-  memblock_region_memory_base_pfn(reg);
+   unsigned long total_pages = PHYS_PFN(memblock_phys_mem_size());
 
return (total_pages * CONFIG_CMA_SIZE_PERCENTAGE / 100) << PAGE_SHIFT;
 }
-- 
2.26.2

[PATCH 01/15] KVM: PPC: Book3S HV: simplify kvm_cma_reserve()

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

The memory size calculation in kvm_cma_reserve() traverses memblock.memory
rather than simply call memblock_phys_mem_size(). The comment in that
function suggests that at some point there should have been call to
memblock_analyze() before memblock_phys_mem_size() could be used.
As of now, there is no memblock_analyze() at all and
memblock_phys_mem_size() can be used as soon as cold-plug memory is
registerd with memblock.

Replace loop over memblock.memory with a call to memblock_phys_mem_size().

Signed-off-by: Mike Rapoport 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 ++-
 1 file changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 7cd3cf3d366b..56ab0d28de2a 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -95,22 +95,15 @@ EXPORT_SYMBOL_GPL(kvm_free_hpt_cma);
 void __init kvm_cma_reserve(void)
 {
unsigned long align_size;
-   struct memblock_region *reg;
-   phys_addr_t selected_size = 0;
+   phys_addr_t selected_size;
 
/*
 * We need CMA reservation only when we are in HV mode
 */
if (!cpu_has_feature(CPU_FTR_HVMODE))
return;
-   /*
-* We cannot use memblock_phys_mem_size() here, because
-* memblock_analyze() has not been called yet.
-*/
-   for_each_memblock(memory, reg)
-   selected_size += memblock_region_memory_end_pfn(reg) -
-memblock_region_memory_base_pfn(reg);
 
+   selected_size = PHYS_PFN(memblock_phys_mem_size());
selected_size = (selected_size * kvm_cma_resv_ratio / 100) << 
PAGE_SHIFT;
if (selected_size) {
pr_debug("%s: reserving %ld MiB for global area\n", __func__,
-- 
2.26.2

[PATCH 00/15] memblock: seasonal cleaning^w cleanup

2020-07-27 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

These patches simplify several uses of memblock iterators and hide some of
the memblock implementation details from the rest of the system.

The patches are on top of v5.8-rc7 + cherry-pick of "mm/sparse: cleanup the
code surrounding memory_present()" [1] from mmotm tree.

[1] http://lkml.kernel.org/r/20200712083130.22919-1-r...@kernel.org 

Mike Rapoport (15):
  KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
  dma-contiguous: simplify cma_early_percent_memory()
  arm, xtensa: simplify initialization of high memory pages
  arm64: numa: simplify dummy_numa_init()
  h8300, nds32, openrisc: simplify detection of memory extents
  powerpc: fadamp: simplify fadump_reserve_crash_area()
  riscv: drop unneeded node initialization
  mircoblaze: drop unneeded NUMA and sparsemem initializations
  memblock: make for_each_memblock_type() iterator private
  memblock: make memblock_debug and related functionality private
  memblock: reduce number of parameters in for_each_mem_range()
  arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
  arch, drivers: replace for_each_membock() with for_each_mem_range()
  x86/numa: remove redundant iteration over memblock.reserved
  memblock: remove 'type' parameter from for_each_memblock()

 .clang-format|  1 +
 arch/arm/kernel/setup.c  | 18 +---
 arch/arm/mm/init.c   | 59 +---
 arch/arm/mm/mmu.c| 39 ++--
 arch/arm/mm/pmsa-v7.c| 20 
 arch/arm/mm/pmsa-v8.c| 17 ---
 arch/arm/xen/mm.c|  7 +--
 arch/arm64/kernel/machine_kexec_file.c   |  6 +--
 arch/arm64/kernel/setup.c|  2 +-
 arch/arm64/mm/init.c | 11 ++---
 arch/arm64/mm/kasan_init.c   |  8 ++--
 arch/arm64/mm/mmu.c  | 11 ++---
 arch/arm64/mm/numa.c | 15 +++---
 arch/c6x/kernel/setup.c  |  9 ++--
 arch/h8300/kernel/setup.c|  8 ++--
 arch/microblaze/mm/init.c| 24 ++
 arch/mips/cavium-octeon/dma-octeon.c | 12 ++---
 arch/mips/kernel/setup.c | 31 ++---
 arch/mips/netlogic/xlp/setup.c   |  2 +-
 arch/nds32/kernel/setup.c|  8 +---
 arch/openrisc/kernel/setup.c |  9 +---
 arch/openrisc/mm/init.c  |  8 ++--
 arch/powerpc/kernel/fadump.c | 58 ---
 arch/powerpc/kvm/book3s_hv_builtin.c | 11 +
 arch/powerpc/mm/book3s64/hash_utils.c| 16 +++
 arch/powerpc/mm/book3s64/radix_pgtable.c | 11 ++---
 arch/powerpc/mm/kasan/kasan_init_32.c|  8 ++--
 arch/powerpc/mm/mem.c| 33 +++--
 arch/powerpc/mm/numa.c   |  7 +--
 arch/powerpc/mm/pgtable_32.c |  8 ++--
 arch/riscv/mm/init.c | 33 -
 arch/riscv/mm/kasan_init.c   | 10 ++--
 arch/s390/kernel/crash_dump.c|  8 ++--
 arch/s390/kernel/setup.c | 31 -
 arch/s390/mm/page-states.c   |  6 +--
 arch/s390/mm/vmem.c  | 16 ---
 arch/sh/mm/init.c|  9 ++--
 arch/sparc/mm/init_64.c  | 12 ++---
 arch/x86/mm/numa.c   | 26 ---
 arch/xtensa/mm/init.c| 55 --
 drivers/bus/mvebu-mbus.c | 12 ++---
 drivers/s390/char/zcore.c|  9 ++--
 include/linux/memblock.h | 45 +-
 kernel/dma/contiguous.c  | 11 +
 mm/memblock.c| 28 +++
 mm/page_alloc.c  | 11 ++---
 mm/sparse.c  | 10 ++--
 47 files changed, 324 insertions(+), 485 deletions(-)

-- 
2.26.2

Re: [PATCH 20/20] Documentation: vm/memory-model: eliminate duplicated word

2020-07-07 Thread Mike Rapoport

On Tue, Jul 07, 2020 at 11:04:13AM -0700, Randy Dunlap wrote:
> Drop the doubled word "the".
> 
> Signed-off-by: Randy Dunlap 
> Cc: Jonathan Corbet 
> Cc: linux-...@vger.kernel.org
> Cc: Andrew Morton 
> Cc: linux...@kvack.org

Reviewed-by: Mike Rapoport 

> ---
>  Documentation/vm/memory-model.rst |2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> --- linux-next-20200701.orig/Documentation/vm/memory-model.rst
> +++ linux-next-20200701/Documentation/vm/memory-model.rst
> @@ -159,7 +159,7 @@ frame. Inside a section, the PFN is the
>  The sparse vmemmap uses a virtually mapped memory map to optimize
>  pfn_to_page and page_to_pfn operations. There is a global `struct
>  page *vmemmap` pointer that points to a virtually contiguous array of
> -`struct page` objects. A PFN is an index to that array and the the
> +`struct page` objects. A PFN is an index to that array and the
>  offset of the `struct page` from `vmemmap` is the PFN of that
>  page.
>  

-- 
Sincerely yours,
Mike.

Re: [PATCH 0/8] mm: cleanup usage of

2020-07-02 Thread Mike Rapoport

Gentle ping.

On Sat, Jun 27, 2020 at 05:34:45PM +0300, Mike Rapoport wrote:
> From: Mike Rapoport 
> 
> Hi,
> 
> Most architectures have very similar versions of pXd_alloc_one() and
> pXd_free_one() for intermediate levels of page table. 
> These patches add generic versions of these functions in
>  and enable use of the generic functions where
> appropriate.
> 
> In addition, functions declared and defined in  headers
> are used mostly by core mm and early mm initialization in arch and there is
> no actual reason to have the  included all over the place.
> The first patch in this series removes unneeded includes of 
> 
> In the end it didn't work out as neatly as I hoped and moving
> pXd_alloc_track() definitions to  would require
> unnecessary changes to arches that have custom page table allocations, so
> I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to
> mm/.
> 
> Joerg Roedel (1):
>   mm: move p?d_alloc_track to separate header file
> 
> Mike Rapoport (7):
>   mm: remove unneeded includes of 
>   opeinrisc: switch to generic version of pte allocation
>   xtensa: switch to generic version of pte allocation
>   asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
>   asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
>   asm-generic: pgalloc: provide generic pgd_free()
>   mm: move lib/ioremap.c to mm/
> 
>  arch/alpha/include/asm/pgalloc.h | 21 +
>  arch/alpha/include/asm/tlbflush.h|  1 -
>  arch/alpha/kernel/core_irongate.c|  1 -
>  arch/alpha/kernel/core_marvel.c  |  1 -
>  arch/alpha/kernel/core_titan.c   |  1 -
>  arch/alpha/kernel/machvec_impl.h |  2 -
>  arch/alpha/kernel/smp.c  |  1 -
>  arch/alpha/mm/numa.c |  1 -
>  arch/arc/mm/fault.c  |  1 -
>  arch/arc/mm/init.c   |  1 -
>  arch/arm/include/asm/pgalloc.h   | 12 +--
>  arch/arm/include/asm/tlb.h   |  1 -
>  arch/arm/kernel/machine_kexec.c  |  1 -
>  arch/arm/kernel/smp.c|  1 -
>  arch/arm/kernel/suspend.c|  1 -
>  arch/arm/mach-omap2/omap-mpuss-lowpower.c|  1 -
>  arch/arm/mm/hugetlbpage.c|  1 -
>  arch/arm/mm/mmu.c|  1 +
>  arch/arm64/include/asm/pgalloc.h | 39 +-
>  arch/arm64/kernel/smp.c  |  1 -
>  arch/arm64/mm/hugetlbpage.c  |  1 -
>  arch/arm64/mm/ioremap.c  |  1 -
>  arch/arm64/mm/mmu.c  |  1 +
>  arch/csky/include/asm/pgalloc.h  |  7 +-
>  arch/csky/kernel/smp.c   |  1 -
>  arch/hexagon/include/asm/pgalloc.h   |  7 +-
>  arch/ia64/include/asm/pgalloc.h  | 24 --
>  arch/ia64/include/asm/tlb.h  |  1 -
>  arch/ia64/kernel/process.c   |  1 -
>  arch/ia64/kernel/smp.c   |  1 -
>  arch/ia64/kernel/smpboot.c   |  1 -
>  arch/ia64/mm/contig.c|  1 -
>  arch/ia64/mm/discontig.c |  1 -
>  arch/ia64/mm/hugetlbpage.c   |  1 -
>  arch/ia64/mm/tlb.c   |  1 -
>  arch/m68k/include/asm/mmu_context.h  |  2 +-
>  arch/m68k/include/asm/sun3_pgalloc.h |  7 +-
>  arch/m68k/kernel/dma.c   |  2 +-
>  arch/m68k/kernel/traps.c |  3 +-
>  arch/m68k/mm/cache.c |  2 +-
>  arch/m68k/mm/fault.c |  1 -
>  arch/m68k/mm/kmap.c  |  2 +-
>  arch/m68k/mm/mcfmmu.c|  1 +
>  arch/m68k/mm/memory.c|  1 -
>  arch/m68k/sun3x/dvma.c   |  2 +-
>  arch/microblaze/include/asm/pgalloc.h|  6 --
>  arch/microblaze/include/asm/tlbflush.h   |  1 -
>  arch/microblaze/kernel/process.c |  1 -
>  arch/microblaze/kernel/signal.c  |  1 -
>  arch/mips/include/asm/pgalloc.h  | 19 +
>  arch/mips/sgi-ip32/ip32-memory.c |  1 -
>  arch/nds32/mm/mm-nds32.c |  2 +
>  arch/nios2/include/asm/pgalloc.h |  7 +-
>  arch/openrisc/include/asm/pgalloc.h  | 33 +---
>  arch/openrisc/include/asm/tlbflush.h |  1 -
>  arch/openrisc/kernel/or32_ksyms.c|  1 -
>  arch/parisc/include/asm/mmu_context.h|  1 -
>  arch/parisc/include/asm/pgalloc.h| 12 +--
>  arch/parisc/kernel/cache.c   |  1 -
>  arch/parisc/kernel/p

Re: [PATCH 10/20] dm: stop using ->queuedata

2020-07-01 Thread Mike Snitzer

On Wed, Jul 01 2020 at  4:59am -0400,
Christoph Hellwig  wrote:

> Instead of setting up the queuedata as well just use one private data
> field.
> 
> Signed-off-by: Christoph Hellwig 

Acked-by: Mike Snitzer

Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()

2020-06-29 Thread Mike Rapoport

On Sat, Jun 27, 2020 at 08:03:04PM +0100, Matthew Wilcox wrote:
> On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote:
> > More elaborate versions on arm64 and x86 account memory for the user page
> > tables and call to pgtable_pmd_page_ctor() as the part of PMD page
> > initialization.
> > 
> > Move the arm64 version to include/asm-generic/pgalloc.h and use the generic
> > version on several architectures.
> > 
> > The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is
> > not enabled, so there is no functional change for most architectures except
> > of the addition of __GFP_ACCOUNT for allocation of user page tables.
> 
> Thanks for including this line; it reminded me that we're not setting
> the PageTable flag on the page, nor accounting it to the zone page stats.
> Hope you don't mind me tagging a patch to do that on as 9/8.

We also never set PageTable flag for early page tables and for the page
tables allocated directly with get_free_page(), e.g PTI, KASAN.

> We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions.
> But that brings me to the next question -- could/should some of this
> be moved over to asm-generic/pgalloc.h?  The ctor/dtor aren't called
> from anywhere else, and there's value to reducing the total amount of
> code in mm.h, but then there's also value to keeping all the ifdef
> ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too.  So I'm a bit torn.
> What do you think?

-- 
Sincerely yours,
Mike.

Re: [PATCH 4/8] asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()

2020-06-28 Thread Mike Rapoport

On Sat, Jun 27, 2020 at 08:03:04PM +0100, Matthew Wilcox wrote:
> On Sat, Jun 27, 2020 at 05:34:49PM +0300, Mike Rapoport wrote:
> > More elaborate versions on arm64 and x86 account memory for the user page
> > tables and call to pgtable_pmd_page_ctor() as the part of PMD page
> > initialization.
> > 
> > Move the arm64 version to include/asm-generic/pgalloc.h and use the generic
> > version on several architectures.
> > 
> > The pgtable_pmd_page_ctor() is a NOP when ARCH_ENABLE_SPLIT_PMD_PTLOCK is
> > not enabled, so there is no functional change for most architectures except
> > of the addition of __GFP_ACCOUNT for allocation of user page tables.
> 
> Thanks for including this line; it reminded me that we're not setting
> the PageTable flag on the page, nor accounting it to the zone page stats.
> Hope you don't mind me tagging a patch to do that on as 9/8.
> 
> We could also do with a pud_page_[cd]tor and maybe even p4d/pgd versions.
> But that brings me to the next question -- could/should some of this
> be moved over to asm-generic/pgalloc.h?  The ctor/dtor aren't called
> from anywhere else, and there's value to reducing the total amount of
> code in mm.h, but then there's also value to keeping all the ifdef
> ARCH_ENABLE_SPLIT_PMD_PTLOCK code together too.  So I'm a bit torn.
> What do you think?

There are arhcitectures that don't use asm-generic/pgalloc.h but rather
have their own, sometimes completely different, versoins of these
funcitons.

I've tried adding linux/pgalloc.h, but I've ended up with contradicting
need to include asm/pgalloc.h before the generic code for some
architecures or after the generic code for others :)

I think let's leave it in mm.h for now, maybe after several more cleaups
we could do better.

-- 
Sincerely yours,
Mike.

Re: [PATCH 9/8] mm: Account PMD tables like PTE tables

2020-06-28 Thread Mike Rapoport

On Sat, Jun 27, 2020 at 07:46:42PM +0100, Matthew Wilcox wrote:
> We account the PTE level of the page tables to the process in order to
> make smarter OOM decisions and help diagnose why memory is fragmented.
> For these same reasons, we should account pages allocated for PMDs.
> With larger process address spaces and ASLR, the number of PMDs in use
> is higher than it used to be so the inaccuracy is starting to matter.
> 
> Signed-off-by: Matthew Wilcox (Oracle) 

Reviewed-by: Mike Rapoport 

> ---
>  include/linux/mm.h | 24 
>  1 file changed, 20 insertions(+), 4 deletions(-)
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index dc7b87310c10..b283e25fcffa 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2271,7 +2271,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct 
> *mm, pmd_t *pmd)
>   return ptlock_ptr(pmd_to_page(pmd));
>  }
>  
> -static inline bool pgtable_pmd_page_ctor(struct page *page)
> +static inline bool pmd_ptlock_init(struct page *page)
>  {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   page->pmd_huge_pte = NULL;
> @@ -2279,7 +2279,7 @@ static inline bool pgtable_pmd_page_ctor(struct page 
> *page)
>   return ptlock_init(page);
>  }
>  
> -static inline void pgtable_pmd_page_dtor(struct page *page)
> +static inline void pmd_ptlock_free(struct page *page)
>  {
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>   VM_BUG_ON_PAGE(page->pmd_huge_pte, page);
> @@ -2296,8 +2296,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct 
> *mm, pmd_t *pmd)
>   return >page_table_lock;
>  }
>  
> -static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; }
> -static inline void pgtable_pmd_page_dtor(struct page *page) {}
> +static inline bool pmd_ptlock_init(struct page *page) { return true; }
> +static inline void pmd_ptlock_free(struct page *page) {}
>  
>  #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte)
>  
> @@ -2310,6 +2310,22 @@ static inline spinlock_t *pmd_lock(struct mm_struct 
> *mm, pmd_t *pmd)
>   return ptl;
>  }
>  
> +static inline bool pgtable_pmd_page_ctor(struct page *page)
> +{
> + if (!pmd_ptlock_init(page))
> + return false;
> + __SetPageTable(page);
> + inc_zone_page_state(page, NR_PAGETABLE);
> + return true;
> +}
> +
> +static inline void pgtable_pmd_page_dtor(struct page *page)
> +{
> + pmd_ptlock_free(page);
> + __ClearPageTable(page);
> + dec_zone_page_state(page, NR_PAGETABLE);
> +}
> +
>  /*
>   * No scalability reason to split PUD locks yet, but follow the same pattern
>   * as the PMD locks to make it easier if we decide to.  The VM should not be
> -- 
> 2.27.0
> 

-- 
Sincerely yours,
Mike.

[PATCH 8/8] mm: move p?d_alloc_track to separate header file

2020-06-27 Thread Mike Rapoport

From: Joerg Roedel 

The functions are only used in two source files, both residing in mm/
subdirectory, so there is no need for them to be in the global 
header.  Move them to the new mm/pgalloc-track.h header and include it only
where needed.

[rppt: mv include/linux/pgalloc-track.h mm/]

Link: http://lkml.kernel.org/r/20200609120533.25867-1-j...@8bytes.org
Signed-off-by: Joerg Roedel 
Cc: Peter Zijlstra (Intel) 
Cc: Andy Lutomirski 
Cc: Abdul Haleem 
Cc: Satheesh Rajendran 
Cc: Stephen Rothwell 
Cc: Steven Rostedt (VMware) 
Cc: Mike Rapoport 
Cc: Christophe Leroy 
Signed-off-by: Mike Rapoport 
---
 include/linux/mm.h | 45 
 mm/ioremap.c   |  2 ++
 mm/pgalloc-track.h | 51 ++
 mm/vmalloc.c   |  1 +
 4 files changed, 54 insertions(+), 45 deletions(-)
 create mode 100644 mm/pgalloc-track.h

diff --git a/include/linux/mm.h b/include/linux/mm.h
index dc7b87310c10..5e878a3c7c57 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2093,51 +2093,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
p4d_t *p4d,
NULL : pud_offset(p4d, address);
 }
 
-static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-
-{
-   if (unlikely(pgd_none(*pgd))) {
-   if (__p4d_alloc(mm, pgd, address))
-   return NULL;
-   *mod_mask |= PGTBL_PGD_MODIFIED;
-   }
-
-   return p4d_offset(pgd, address);
-}
-
-static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(p4d_none(*p4d))) {
-   if (__pud_alloc(mm, p4d, address))
-   return NULL;
-   *mod_mask |= PGTBL_P4D_MODIFIED;
-   }
-
-   return pud_offset(p4d, address);
-}
-
 static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long 
address)
 {
return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
NULL: pmd_offset(pud, address);
 }
-
-static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
-unsigned long address,
-pgtbl_mod_mask *mod_mask)
-{
-   if (unlikely(pud_none(*pud))) {
-   if (__pmd_alloc(mm, pud, address))
-   return NULL;
-   *mod_mask |= PGTBL_PUD_MODIFIED;
-   }
-
-   return pmd_offset(pud, address);
-}
 #endif /* CONFIG_MMU */
 
 #if USE_SPLIT_PTE_PTLOCKS
@@ -2253,11 +2213,6 @@ static inline void pgtable_pte_page_dtor(struct page 
*page)
((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
NULL: pte_offset_kernel(pmd, address))
 
-#define pte_alloc_kernel_track(pmd, address, mask) \
-   ((unlikely(pmd_none(*(pmd))) && \
- (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
-   NULL: pte_offset_kernel(pmd, address))
-
 #if USE_SPLIT_PMD_PTLOCKS
 
 static struct page *pmd_to_page(pmd_t *pmd)
diff --git a/mm/ioremap.c b/mm/ioremap.c
index 5ee3526f71b8..5fa1ab41d152 100644
--- a/mm/ioremap.c
+++ b/mm/ioremap.c
@@ -13,6 +13,8 @@
 #include 
 #include 
 
+#include "pgalloc-track.h"
+
 #ifdef CONFIG_HAVE_ARCH_HUGE_VMAP
 static int __read_mostly ioremap_p4d_capable;
 static int __read_mostly ioremap_pud_capable;
diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h
new file mode 100644
index ..1dcc865029a2
--- /dev/null
+++ b/mm/pgalloc-track.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_PGALLLC_TRACK_H
+#define _LINUX_PGALLLC_TRACK_H
+
+#if defined(CONFIG_MMU)
+static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(pgd_none(*pgd))) {
+   if (__p4d_alloc(mm, pgd, address))
+   return NULL;
+   *mod_mask |= PGTBL_PGD_MODIFIED;
+   }
+
+   return p4d_offset(pgd, address);
+}
+
+static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
+unsigned long address,
+pgtbl_mod_mask *mod_mask)
+{
+   if (unlikely(p4d_none(*p4d))) {
+   if (__pud_alloc(mm, p4d, address))
+   return NULL;
+   *mod_mask |= PGTBL_P4D_MODIFIED;
+   }
+
+   return pud_offset(p4d, address);
+}
+
+static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
+unsigned long address,
+

[PATCH 6/8] asm-generic: pgalloc: provide generic pgd_free()

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

Most architectures define pgd_free() as a wrapper for free_page().

Provide a generic version in asm-generic/pgalloc.h and enable its use for
most architectures.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgalloc.h  | 6 --
 arch/arm/include/asm/pgalloc.h| 1 +
 arch/arm64/include/asm/pgalloc.h  | 1 +
 arch/csky/include/asm/pgalloc.h   | 7 +--
 arch/hexagon/include/asm/pgalloc.h| 7 +--
 arch/ia64/include/asm/pgalloc.h   | 5 -
 arch/m68k/include/asm/sun3_pgalloc.h  | 7 +--
 arch/microblaze/include/asm/pgalloc.h | 6 --
 arch/mips/include/asm/pgalloc.h   | 5 -
 arch/nds32/mm/mm-nds32.c  | 2 ++
 arch/nios2/include/asm/pgalloc.h  | 7 +--
 arch/parisc/include/asm/pgalloc.h | 1 +
 arch/riscv/include/asm/pgalloc.h  | 5 -
 arch/sh/include/asm/pgalloc.h | 1 +
 arch/um/include/asm/pgalloc.h | 1 -
 arch/um/kernel/mem.c  | 5 -
 arch/x86/include/asm/pgalloc.h| 1 +
 arch/xtensa/include/asm/pgalloc.h | 5 -
 include/asm-generic/pgalloc.h | 7 +++
 19 files changed, 18 insertions(+), 62 deletions(-)

diff --git a/arch/alpha/include/asm/pgalloc.h b/arch/alpha/include/asm/pgalloc.h
index 4834cd52e9d0..9c6a24fe493d 100644
--- a/arch/alpha/include/asm/pgalloc.h
+++ b/arch/alpha/include/asm/pgalloc.h
@@ -34,10 +34,4 @@ pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd)
 
 extern pgd_t *pgd_alloc(struct mm_struct *mm);
 
-static inline void
-pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 #endif /* _ALPHA_PGALLOC_H */
diff --git a/arch/arm/include/asm/pgalloc.h b/arch/arm/include/asm/pgalloc.h
index c5bdfd404ea5..15f4674715f8 100644
--- a/arch/arm/include/asm/pgalloc.h
+++ b/arch/arm/include/asm/pgalloc.h
@@ -65,6 +65,7 @@ static inline void clean_pte_table(pte_t *pte)
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
 #define __HAVE_ARCH_PTE_ALLOC_ONE
+#define __HAVE_ARCH_PGD_FREE
 #include 
 
 static inline pte_t *
diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 0965945b595d..3c6a7f5988b1 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -13,6 +13,7 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PGD_FREE
 #include 
 
 #define PGD_SIZE   (PTRS_PER_PGD * sizeof(pgd_t))
diff --git a/arch/csky/include/asm/pgalloc.h b/arch/csky/include/asm/pgalloc.h
index c7c1ed27e348..d58d8146b729 100644
--- a/arch/csky/include/asm/pgalloc.h
+++ b/arch/csky/include/asm/pgalloc.h
@@ -9,7 +9,7 @@
 #include 
 
 #define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
-#include/* for pte_{alloc,free}_one */
+#include 
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
pte_t *pte)
@@ -42,11 +42,6 @@ static inline pte_t *pte_alloc_one_kernel(struct mm_struct 
*mm)
return pte;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_pages((unsigned long)pgd, PGD_ORDER);
-}
-
 static inline pgd_t *pgd_alloc(struct mm_struct *mm)
 {
pgd_t *ret;
diff --git a/arch/hexagon/include/asm/pgalloc.h 
b/arch/hexagon/include/asm/pgalloc.h
index cc9be514a676..f0c47e6a7427 100644
--- a/arch/hexagon/include/asm/pgalloc.h
+++ b/arch/hexagon/include/asm/pgalloc.h
@@ -11,7 +11,7 @@
 #include 
 #include 
 
-#include/* for pte_{alloc,free}_one */
+#include 
 
 extern unsigned long long kmap_generation;
 
@@ -41,11 +41,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return pgd;
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long) pgd);
-}
-
 static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd,
pgtable_t pte)
 {
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 06f80358e20f..9601cfe83c94 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -29,11 +29,6 @@ static inline pgd_t *pgd_alloc(struct mm_struct *mm)
return (pgd_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
 }
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 #if CONFIG_PGTABLE_LEVELS == 4
 static inline void
 p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t * pud)
diff --git a/arch/m68k/include/asm/sun3_pgalloc.h 
b/arch/m68k/include/asm/sun3_pgalloc.h
index 11b95dadf7c0..000f64869b91 100644
--- a/arch/m68k/include/asm/sun3_pgalloc.h
+++ b/arch/m68k/include/asm/sun3_pgalloc.h
@@ -13,7 +13,7 @@
 
 #include 
 
-#include/* for pte_{alloc,free}_one */
+#include 
 
 extern const char bad_pmd_string[];
 
@@ -40,11 +40,6 @@ static inline void pmd_populate(struct mm_struct *mm, pmd_t 
*pmd, pgtable_t page
  */
 #define pmd_free(mm, x)do { } while (0)
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd

[PATCH 5/8] asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

Several architectures define pud_alloc_one() as a wrapper for
__get_free_page() and pud_free() as a wrapper for free_page().

Provide a generic implementation in asm-generic/pgalloc.h and use it where
appropriate.

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgalloc.h | 11 ---
 arch/ia64/include/asm/pgalloc.h  |  9 -
 arch/mips/include/asm/pgalloc.h  |  6 +-
 arch/x86/include/asm/pgalloc.h   | 15 ---
 include/asm-generic/pgalloc.h| 30 ++
 5 files changed, 31 insertions(+), 40 deletions(-)

diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h
index 7246d0a662e1..0965945b595d 100644
--- a/arch/arm64/include/asm/pgalloc.h
+++ b/arch/arm64/include/asm/pgalloc.h
@@ -37,17 +37,6 @@ static inline void __pud_populate(pud_t *pudp, phys_addr_t 
pmdp, pudval_t prot)
 
 #if CONFIG_PGTABLE_LEVELS > 3
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   return (pud_t *)__get_free_page(GFP_PGTABLE_USER);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pudp)
-{
-   BUG_ON((unsigned long)pudp & (PAGE_SIZE-1));
-   free_page((unsigned long)pudp);
-}
-
 static inline void __p4d_populate(p4d_t *p4dp, phys_addr_t pudp, p4dval_t prot)
 {
set_p4d(p4dp, __p4d(__phys_to_p4d_val(pudp) | prot));
diff --git a/arch/ia64/include/asm/pgalloc.h b/arch/ia64/include/asm/pgalloc.h
index 5da1fc76477b..06f80358e20f 100644
--- a/arch/ia64/include/asm/pgalloc.h
+++ b/arch/ia64/include/asm/pgalloc.h
@@ -41,15 +41,6 @@ p4d_populate(struct mm_struct *mm, p4d_t * p4d_entry, pud_t 
* pud)
p4d_val(*p4d_entry) = __pa(pud);
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   return (pud_t *)__get_free_page(GFP_KERNEL | __GFP_ZERO);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   free_page((unsigned long)pud);
-}
 #define __pud_free_tlb(tlb, pud, address)  pud_free((tlb)->mm, pud)
 #endif /* CONFIG_PGTABLE_LEVELS == 4 */
 
diff --git a/arch/mips/include/asm/pgalloc.h b/arch/mips/include/asm/pgalloc.h
index eed1b3e8c642..e5a840910ce0 100644
--- a/arch/mips/include/asm/pgalloc.h
+++ b/arch/mips/include/asm/pgalloc.h
@@ -14,6 +14,7 @@
 #include 
 
 #define __HAVE_ARCH_PMD_ALLOC_ONE
+#define __HAVE_ARCH_PUD_ALLOC_ONE
 #include 
 
 static inline void pmd_populate_kernel(struct mm_struct *mm, pmd_t *pmd,
@@ -87,11 +88,6 @@ static inline pud_t *pud_alloc_one(struct mm_struct *mm, 
unsigned long address)
return pud;
 }
 
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   free_pages((unsigned long)pud, PUD_ORDER);
-}
-
 static inline void p4d_populate(struct mm_struct *mm, p4d_t *p4d, pud_t *pud)
 {
set_p4d(p4d, __p4d((unsigned long)pud));
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 25feaa117c40..3d1085a14347 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -123,21 +123,6 @@ static inline void p4d_populate_safe(struct mm_struct *mm, 
p4d_t *p4d, pud_t *pu
set_p4d_safe(p4d, __p4d(_PAGE_TABLE | __pa(pud)));
 }
 
-static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
-{
-   gfp_t gfp = GFP_KERNEL_ACCOUNT;
-
-   if (mm == _mm)
-   gfp &= ~__GFP_ACCOUNT;
-   return (pud_t *)get_zeroed_page(gfp);
-}
-
-static inline void pud_free(struct mm_struct *mm, pud_t *pud)
-{
-   BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
-   free_page((unsigned long)pud);
-}
-
 extern void ___pud_free_tlb(struct mmu_gather *tlb, pud_t *pud);
 
 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
diff --git a/include/asm-generic/pgalloc.h b/include/asm-generic/pgalloc.h
index 1bc027891a00..d361574aaadf 100644
--- a/include/asm-generic/pgalloc.h
+++ b/include/asm-generic/pgalloc.h
@@ -145,6 +145,36 @@ static inline void pmd_free(struct mm_struct *mm, pmd_t 
*pmd)
 
 #endif /* CONFIG_PGTABLE_LEVELS > 2 */
 
+#if CONFIG_PGTABLE_LEVELS > 3
+
+#ifndef __HAVE_ARCH_PUD_FREE
+/**
+ * pud_alloc_one - allocate a page for PUD-level page table
+ * @mm: the mm_struct of the current context
+ *
+ * Allocates a page using %GFP_PGTABLE_USER for user context and
+ * %GFP_PGTABLE_KERNEL for kernel context.
+ *
+ * Return: pointer to the allocated memory or %NULL on error
+ */
+static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr)
+{
+   gfp_t gfp = GFP_PGTABLE_USER;
+
+   if (mm == _mm)
+   gfp = GFP_PGTABLE_KERNEL;
+   return (pud_t *)get_zeroed_page(gfp);
+}
+#endif
+
+static inline void pud_free(struct mm_struct *mm, pud_t *pud)
+{
+   BUG_ON((unsigned long)pud & (PAGE_SIZE-1));
+   free_page((unsigned long)pud);
+}
+
+#endif /* CONFIG_PGTABLE_LEVELS > 3 */
+
 #endif /* CONFIG_MMU */
 
 #endif /* __ASM_GENERIC_PGALLOC_H */
-- 
2.26.2

[PATCH 3/8] xtensa: switch to generic version of pte allocation

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

xtensa clears PTEs during allocation of the page tables and pte_clear()
sets the PTE to a non-zero value. Splitting ptes_clear() helper out of
pte_alloc_one() and pte_alloc_one_kernel() allows reuse of base generic
allocation methods (__pte_alloc_one() and __pte_alloc_one_kernel()) and the
common GFP mask for page table allocations.

The pte_free() and pte_free_kernel() implementations on xtensa are
identical to the generic ones and can be dropped.

Signed-off-by: Mike Rapoport 
---
 arch/xtensa/include/asm/pgalloc.h | 41 ++-
 1 file changed, 19 insertions(+), 22 deletions(-)

diff --git a/arch/xtensa/include/asm/pgalloc.h 
b/arch/xtensa/include/asm/pgalloc.h
index 1d38f0e755ba..60ee94b42850 100644
--- a/arch/xtensa/include/asm/pgalloc.h
+++ b/arch/xtensa/include/asm/pgalloc.h
@@ -8,9 +8,14 @@
 #ifndef _XTENSA_PGALLOC_H
 #define _XTENSA_PGALLOC_H
 
+#ifdef CONFIG_MMU
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#define __HAVE_ARCH_PTE_ALLOC_ONE
+#include 
+
 /*
  * Allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
@@ -33,45 +38,37 @@ static inline void pgd_free(struct mm_struct *mm, pgd_t 
*pgd)
free_page((unsigned long)pgd);
 }
 
+static inline void ptes_clear(pte_t *ptep)
+{
+   int i;
+
+   for (i = 0; i < PTRS_PER_PTE; i++)
+   pte_clear(NULL, 0, ptep + i);
+}
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
pte_t *ptep;
-   int i;
 
-   ptep = (pte_t *)__get_free_page(GFP_KERNEL);
+   ptep = (pte_t *)__pte_alloc_one_kernel(mm);
if (!ptep)
return NULL;
-   for (i = 0; i < 1024; i++)
-   pte_clear(NULL, 0, ptep + i);
+   ptes_clear(ptep);
return ptep;
 }
 
 static inline pgtable_t pte_alloc_one(struct mm_struct *mm)
 {
-   pte_t *pte;
struct page *page;
 
-   pte = pte_alloc_one_kernel(mm);
-   if (!pte)
-   return NULL;
-   page = virt_to_page(pte);
-   if (!pgtable_pte_page_ctor(page)) {
-   __free_page(page);
+   page = __pte_alloc_one(mm, GFP_PGTABLE_USER);
+   if (!page)
return NULL;
-   }
+   ptes_clear(page_address(page));
return page;
 }
 
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, pgtable_t pte)
-{
-   pgtable_pte_page_dtor(pte);
-   __free_page(pte);
-}
 #define pmd_pgtable(pmd) pmd_page(pmd)
+#endif CONFIG_MMU
 
 #endif /* _XTENSA_PGALLOC_H */
-- 
2.26.2

[PATCH 2/8] opeinrisc: switch to generic version of pte allocation

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

Replace pte_alloc_one(), pte_free() and pte_free_kernel() with the generic
implementation. The only actual functional change is the addition of
__GFP_ACCOUT for the allocation of the user page tables.

The pte_alloc_one_kernel() is kept back because its implementation on
openrisc is different than the generic one.

Signed-off-by: Mike Rapoport 
---
 arch/openrisc/include/asm/pgalloc.h | 33 +++--
 1 file changed, 3 insertions(+), 30 deletions(-)

diff --git a/arch/openrisc/include/asm/pgalloc.h 
b/arch/openrisc/include/asm/pgalloc.h
index da12a4c38c4b..88820299ecc4 100644
--- a/arch/openrisc/include/asm/pgalloc.h
+++ b/arch/openrisc/include/asm/pgalloc.h
@@ -20,6 +20,9 @@
 #include 
 #include 
 
+#define __HAVE_ARCH_PTE_ALLOC_ONE_KERNEL
+#include 
+
 extern int mem_init_done;
 
 #define pmd_populate_kernel(mm, pmd, pte) \
@@ -61,38 +64,8 @@ extern inline pgd_t *pgd_alloc(struct mm_struct *mm)
 }
 #endif
 
-static inline void pgd_free(struct mm_struct *mm, pgd_t *pgd)
-{
-   free_page((unsigned long)pgd);
-}
-
 extern pte_t *pte_alloc_one_kernel(struct mm_struct *mm);
 
-static inline struct page *pte_alloc_one(struct mm_struct *mm)
-{
-   struct page *pte;
-   pte = alloc_pages(GFP_KERNEL, 0);
-   if (!pte)
-   return NULL;
-   clear_page(page_address(pte));
-   if (!pgtable_pte_page_ctor(pte)) {
-   __free_page(pte);
-   return NULL;
-   }
-   return pte;
-}
-
-static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
-{
-   free_page((unsigned long)pte);
-}
-
-static inline void pte_free(struct mm_struct *mm, struct page *pte)
-{
-   pgtable_pte_page_dtor(pte);
-   __free_page(pte);
-}
-
 #define __pte_free_tlb(tlb, pte, addr) \
 do {   \
pgtable_pte_page_dtor(pte); \
-- 
2.26.2

[PATCH 1/8] mm: remove unneeded includes of

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

In the most cases  header is required only for allocations
of page table memory. Most of the .c files that include that header do not
use symbols declared in  and do not require that header.

As for the other header files that used to include , it is
possible to move that include into the .c file that actually uses symbols
from  and drop the include from the header file.

The process was somewhat automated using

sed -i -E '/[<"]asm\/pgalloc\.h/d' \
$(grep -L -w -f /tmp/xx \
$(git grep -E -l '[<"]asm/pgalloc\.h'))

where /tmp/xx contains all the symbols defined in
arch/*/include/asm/pgalloc.h.

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/tlbflush.h| 1 -
 arch/alpha/kernel/core_irongate.c| 1 -
 arch/alpha/kernel/core_marvel.c  | 1 -
 arch/alpha/kernel/core_titan.c   | 1 -
 arch/alpha/kernel/machvec_impl.h | 2 --
 arch/alpha/kernel/smp.c  | 1 -
 arch/alpha/mm/numa.c | 1 -
 arch/arc/mm/fault.c  | 1 -
 arch/arc/mm/init.c   | 1 -
 arch/arm/include/asm/tlb.h   | 1 -
 arch/arm/kernel/machine_kexec.c  | 1 -
 arch/arm/kernel/smp.c| 1 -
 arch/arm/kernel/suspend.c| 1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c| 1 -
 arch/arm/mm/hugetlbpage.c| 1 -
 arch/arm/mm/mmu.c| 1 +
 arch/arm64/kernel/smp.c  | 1 -
 arch/arm64/mm/hugetlbpage.c  | 1 -
 arch/arm64/mm/ioremap.c  | 1 -
 arch/arm64/mm/mmu.c  | 1 +
 arch/csky/kernel/smp.c   | 1 -
 arch/ia64/include/asm/tlb.h  | 1 -
 arch/ia64/kernel/process.c   | 1 -
 arch/ia64/kernel/smp.c   | 1 -
 arch/ia64/kernel/smpboot.c   | 1 -
 arch/ia64/mm/contig.c| 1 -
 arch/ia64/mm/discontig.c | 1 -
 arch/ia64/mm/hugetlbpage.c   | 1 -
 arch/ia64/mm/tlb.c   | 1 -
 arch/m68k/include/asm/mmu_context.h  | 2 +-
 arch/m68k/kernel/dma.c   | 2 +-
 arch/m68k/kernel/traps.c | 3 +--
 arch/m68k/mm/cache.c | 2 +-
 arch/m68k/mm/fault.c | 1 -
 arch/m68k/mm/kmap.c  | 2 +-
 arch/m68k/mm/mcfmmu.c| 1 +
 arch/m68k/mm/memory.c| 1 -
 arch/m68k/sun3x/dvma.c   | 2 +-
 arch/microblaze/include/asm/tlbflush.h   | 1 -
 arch/microblaze/kernel/process.c | 1 -
 arch/microblaze/kernel/signal.c  | 1 -
 arch/mips/sgi-ip32/ip32-memory.c | 1 -
 arch/openrisc/include/asm/tlbflush.h | 1 -
 arch/openrisc/kernel/or32_ksyms.c| 1 -
 arch/parisc/include/asm/mmu_context.h| 1 -
 arch/parisc/kernel/cache.c   | 1 -
 arch/parisc/kernel/pci-dma.c | 1 -
 arch/parisc/kernel/process.c | 1 -
 arch/parisc/kernel/signal.c  | 1 -
 arch/parisc/kernel/smp.c | 1 -
 arch/parisc/mm/hugetlbpage.c | 1 -
 arch/parisc/mm/ioremap.c | 2 +-
 arch/powerpc/include/asm/tlb.h   | 1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  | 1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  | 1 -
 arch/powerpc/mm/book3s64/hash_tlb.c  | 1 -
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 1 -
 arch/powerpc/mm/init_32.c| 1 -
 arch/powerpc/mm/kasan/8xx.c  | 1 -
 arch/powerpc/mm/kasan/book3s_32.c| 1 -
 arch/powerpc/mm/mem.c| 1 -
 arch/powerpc/mm/nohash/40x.c | 1 -
 arch/powerpc/mm/nohash/8xx.c | 1 -
 arch/powerpc/mm/nohash/fsl_booke.c   | 1 -
 arch/powerpc/mm/nohash/kaslr_booke.c | 1 -
 arch/powerpc/mm/pgtable.c| 1 -
 arch/powerpc/mm/pgtable_64.c | 1 -
 arch/powerpc/mm/ptdump/hashpagetable.c   | 2 +-
 arch/powerpc/mm/ptdump/ptdump.c  | 1 -
 arch/powerpc/platforms/pseries/cmm.c | 1 -
 arch/riscv/mm/fault.c| 1 -
 arch/s390/include/asm/tlb.h  | 1 -
 arch/s390/include/asm/tlbflush.h | 1 -
 arch/s390/kernel/machine_kexec.c | 1 -
 arch/s390/kernel/ptrace.c| 1 -
 arch/s390/kvm/diag.c | 1 -
 arch/s390/kvm/priv.c | 1 -
 arch/s390/kvm/pv.c   | 1 -
 arch/s390/mm/cmm.c   | 1 -
 arch/s390/mm/mmap.c  | 1 -
 arch/s390/mm/pgtable.c   | 1 -
 arch/sh/kernel/idle.c| 1 -

[PATCH 0/8] mm: cleanup usage of

2020-06-27 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

Most architectures have very similar versions of pXd_alloc_one() and
pXd_free_one() for intermediate levels of page table. 
These patches add generic versions of these functions in
 and enable use of the generic functions where
appropriate.

In addition, functions declared and defined in  headers
are used mostly by core mm and early mm initialization in arch and there is
no actual reason to have the  included all over the place.
The first patch in this series removes unneeded includes of 

In the end it didn't work out as neatly as I hoped and moving
pXd_alloc_track() definitions to  would require
unnecessary changes to arches that have custom page table allocations, so
I've decided to move lib/ioremap.c to mm/ and make pgalloc-track.h local to
mm/.

Joerg Roedel (1):
  mm: move p?d_alloc_track to separate header file

Mike Rapoport (7):
  mm: remove unneeded includes of 
  opeinrisc: switch to generic version of pte allocation
  xtensa: switch to generic version of pte allocation
  asm-generic: pgalloc: provide generic pmd_alloc_one() and pmd_free_one()
  asm-generic: pgalloc: provide generic pud_alloc_one() and pud_free_one()
  asm-generic: pgalloc: provide generic pgd_free()
  mm: move lib/ioremap.c to mm/

 arch/alpha/include/asm/pgalloc.h | 21 +
 arch/alpha/include/asm/tlbflush.h|  1 -
 arch/alpha/kernel/core_irongate.c|  1 -
 arch/alpha/kernel/core_marvel.c  |  1 -
 arch/alpha/kernel/core_titan.c   |  1 -
 arch/alpha/kernel/machvec_impl.h |  2 -
 arch/alpha/kernel/smp.c  |  1 -
 arch/alpha/mm/numa.c |  1 -
 arch/arc/mm/fault.c  |  1 -
 arch/arc/mm/init.c   |  1 -
 arch/arm/include/asm/pgalloc.h   | 12 +--
 arch/arm/include/asm/tlb.h   |  1 -
 arch/arm/kernel/machine_kexec.c  |  1 -
 arch/arm/kernel/smp.c|  1 -
 arch/arm/kernel/suspend.c|  1 -
 arch/arm/mach-omap2/omap-mpuss-lowpower.c|  1 -
 arch/arm/mm/hugetlbpage.c|  1 -
 arch/arm/mm/mmu.c|  1 +
 arch/arm64/include/asm/pgalloc.h | 39 +-
 arch/arm64/kernel/smp.c  |  1 -
 arch/arm64/mm/hugetlbpage.c  |  1 -
 arch/arm64/mm/ioremap.c  |  1 -
 arch/arm64/mm/mmu.c  |  1 +
 arch/csky/include/asm/pgalloc.h  |  7 +-
 arch/csky/kernel/smp.c   |  1 -
 arch/hexagon/include/asm/pgalloc.h   |  7 +-
 arch/ia64/include/asm/pgalloc.h  | 24 --
 arch/ia64/include/asm/tlb.h  |  1 -
 arch/ia64/kernel/process.c   |  1 -
 arch/ia64/kernel/smp.c   |  1 -
 arch/ia64/kernel/smpboot.c   |  1 -
 arch/ia64/mm/contig.c|  1 -
 arch/ia64/mm/discontig.c |  1 -
 arch/ia64/mm/hugetlbpage.c   |  1 -
 arch/ia64/mm/tlb.c   |  1 -
 arch/m68k/include/asm/mmu_context.h  |  2 +-
 arch/m68k/include/asm/sun3_pgalloc.h |  7 +-
 arch/m68k/kernel/dma.c   |  2 +-
 arch/m68k/kernel/traps.c |  3 +-
 arch/m68k/mm/cache.c |  2 +-
 arch/m68k/mm/fault.c |  1 -
 arch/m68k/mm/kmap.c  |  2 +-
 arch/m68k/mm/mcfmmu.c|  1 +
 arch/m68k/mm/memory.c|  1 -
 arch/m68k/sun3x/dvma.c   |  2 +-
 arch/microblaze/include/asm/pgalloc.h|  6 --
 arch/microblaze/include/asm/tlbflush.h   |  1 -
 arch/microblaze/kernel/process.c |  1 -
 arch/microblaze/kernel/signal.c  |  1 -
 arch/mips/include/asm/pgalloc.h  | 19 +
 arch/mips/sgi-ip32/ip32-memory.c |  1 -
 arch/nds32/mm/mm-nds32.c |  2 +
 arch/nios2/include/asm/pgalloc.h |  7 +-
 arch/openrisc/include/asm/pgalloc.h  | 33 +---
 arch/openrisc/include/asm/tlbflush.h |  1 -
 arch/openrisc/kernel/or32_ksyms.c|  1 -
 arch/parisc/include/asm/mmu_context.h|  1 -
 arch/parisc/include/asm/pgalloc.h| 12 +--
 arch/parisc/kernel/cache.c   |  1 -
 arch/parisc/kernel/pci-dma.c |  1 -
 arch/parisc/kernel/process.c |  1 -
 arch/parisc/kernel/signal.c  |  1 -
 arch/parisc/kernel/smp.c |  1 -
 arch/parisc/mm/hugetlbpage.c |  1 -
 arch/parisc/mm/ioremap.c |  2 +-
 arch/powerpc/include/asm/tlb.h   |  1 -
 arch/powerpc/mm/book3s64/hash_hugetlbpage.c  |  1 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  1 -
 arch/powerpc/mm/book3s64/hash_tlb.c  |  1 -
 arch/powerpc/mm/book3s64

Re: [PATCH] mm: Move p?d_alloc_track to separate header file

2020-06-18 Thread Mike Rapoport

On Wed, Jun 17, 2020 at 06:12:26PM -0700, Andrew Morton wrote:
> On Tue,  9 Jun 2020 14:05:33 +0200 Joerg Roedel  wrote:
> 
> > From: Joerg Roedel 
> > 
> > The functions are only used in two source files, so there is no need
> > for them to be in the global  header. Move them to the new
> >  header and include it only where needed.
> > 
> > ...
> >
> > new file mode 100644
> > index ..1dcc865029a2
> > --- /dev/null
> > +++ b/include/linux/pgalloc-track.h
> > @@ -0,0 +1,51 @@
> > +/* SPDX-License-Identifier: GPL-2.0 */
> > +#ifndef _LINUX_PGALLLC_TRACK_H
> > +#define _LINUX_PGALLLC_TRACK_H
> 
> hm, no #includes.  I guess this is OK, given the limited use.
> 
> But it does make one wonder whether ioremap.c should be moved from lib/
> to mm/ and this file should be moved from include/linux/ to mm/.

It makes sense, but I am anyway planning consolidation of pgalloc.h, so
most probably pgalloc-track will not survive until 5.9-rc1 :)

If you think that it worth moving ioremap.c to mm/ regardless of chrun,
I can send a patch for that.

> Oh well.

-- 
Sincerely yours,
Mike.

Re: [PATCH V3 (RESEND) 0/3] arm64: Enable vmemmap mapping from device memory

2020-06-18 Thread Mike Rapoport

On Thu, Jun 18, 2020 at 06:45:27AM +0530, Anshuman Khandual wrote:
> This series enables vmemmap backing memory allocation from device memory
> ranges on arm64. But before that, it enables vmemmap_populate_basepages()
> and vmemmap_alloc_block_buf() to accommodate struct vmem_altmap based
> alocation requests.
> 
> This series applies on 5.8-rc1.
> 
> Pending Question:
> 
> altmap_alloc_block_buf() does not have any other remaining users in
> the tree after this change. Should it be converted into a static
> function and it's declaration be dropped from the header
> (include/linux/mm.h). Avoided doing so because I was not sure if there
> are any off-tree users or not.

Well, off-tree users probably have an active fork anyway so they could
switch to vmemmap_alloc_block_buf()...

Regardless, can you please update Documentation/vm/memory-model.rst to
keep it in sync with the code?

> Changes in V3:
> 
> - Dropped comment from free_hotplug_page_range() per Robin
> - Modified comment in unmap_hotplug_range() per Robin
> - Enabled altmap support in vmemmap_alloc_block_buf() per Robin
> 
> Changes in V2: (https://lkml.org/lkml/2020/3/4/475)
> 
> - Rebased on latest hot-remove series (v14) adding P4D page table support
> 
> Changes in V1: (https://lkml.org/lkml/2020/1/23/12)
> 
> - Added an WARN_ON() in unmap_hotplug_range() when altmap is
>   provided without the page table backing memory being freed
> 
> Changes in RFC V2: (https://lkml.org/lkml/2019/10/21/11)
> 
> - Changed the commit message on 1/2 patch per Will
> - Changed the commit message on 2/2 patch as well
> - Rebased on arm64 memory hot remove series (v10)
> 
> RFC V1: (https://lkml.org/lkml/2019/6/28/32)
> 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Mark Rutland 
> Cc: Paul Walmsley 
> Cc: Palmer Dabbelt 
> Cc: Tony Luck 
> Cc: Fenghua Yu 
> Cc: Dave Hansen 
> Cc: Andy Lutomirski 
> Cc: Peter Zijlstra 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: David Hildenbrand 
> Cc: Mike Rapoport 
> Cc: Michal Hocko 
> Cc: "Matthew Wilcox (Oracle)" 
> Cc: "Kirill A. Shutemov" 
> Cc: Andrew Morton 
> Cc: Dan Williams 
> Cc: Pavel Tatashin 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-i...@vger.kernel.org
> Cc: linux-ri...@lists.infradead.org
> Cc: x...@kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux...@kvack.org
> Cc: linux-ker...@vger.kernel.org
> 
> Anshuman Khandual (3):
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_populate_basepages()
>   mm/sparsemem: Enable vmem_altmap support in vmemmap_alloc_block_buf()
>   arm64/mm: Enable vmem_altmap support for vmemmap mappings
> 
>  arch/arm64/mm/mmu.c   | 59 ++-
>  arch/ia64/mm/discontig.c  |  2 +-
>  arch/powerpc/mm/init_64.c | 10 +++
>  arch/riscv/mm/init.c  |  2 +-
>  arch/x86/mm/init_64.c | 12 
>  include/linux/mm.h|  8 --
>  mm/sparse-vmemmap.c   | 38 -
>  7 files changed, 87 insertions(+), 44 deletions(-)
> 
> -- 
> 2.20.1
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH V3 4/4] Documentation/mm: Add descriptions for arch page table helpers

2020-06-17 Thread Mike Rapoport

On Mon, Jun 15, 2020 at 09:07:57AM +0530, Anshuman Khandual wrote:
> This adds a specific description file for all arch page table helpers which
> is in sync with the semantics being tested via CONFIG_DEBUG_VM_PGTABLE. All
> future changes either to these descriptions here or the debug test should
> always remain in sync.
> 
> Cc: Jonathan Corbet 
> Cc: Andrew Morton 
> Cc: Mike Rapoport 
> Cc: Vineet Gupta 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Heiko Carstens 
> Cc: Vasily Gorbik 
> Cc: Christian Borntraeger 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Borislav Petkov 
> Cc: "H. Peter Anvin" 
> Cc: Kirill A. Shutemov 
> Cc: Paul Walmsley 
> Cc: Palmer Dabbelt 
> Cc: linux-snps-...@lists.infradead.org
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-s...@vger.kernel.org
> Cc: linux-ri...@lists.infradead.org
> Cc: x...@kernel.org
> Cc: linux-a...@vger.kernel.org
> Cc: linux...@kvack.org
> Cc: linux-...@vger.kernel.org
> Cc: linux-ker...@vger.kernel.org
> Suggested-by: Mike Rapoport 
> Signed-off-by: Anshuman Khandual 
> ---
>  Documentation/vm/arch_pgtable_helpers.rst | 258 ++
>  mm/debug_vm_pgtable.c     |   6 +
>  2 files changed, 264 insertions(+)
>  create mode 100644 Documentation/vm/arch_pgtable_helpers.rst

Acked-by: Mike Rapoport

Re: [PATCH] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()

2020-06-16 Thread Mike Rapoport

On Wed, Jun 17, 2020 at 09:21:42AM +1000, Michael Ellerman wrote:
> Andrew Morton  writes:
> > On Mon, 15 Jun 2020 12:22:29 +0300 Mike Rapoport  wrote:
> >
> >> From: Mike Rapoport 
> >> 
> >> The pte_update() implementation for PPC_8xx unfolds page table from the PGD
> >> level to access a PMD entry. Since 8xx has only 2-level page table this can
> >> be simplified with pmd_off() shortcut.
> >> 
> >> Replace explicit unfolding with pmd_off() and drop defines of pgd_index()
> >> and pgd_offset() that are no longer needed.
> >> 
> >> Signed-off-by: Mike Rapoport 
> >> ---
> >> 
> >> I think it's powerpc material, but I won't mind if Andrew picks it up :)
> >
> > Via the powerpc tree would be better, please.
> 
> I'll take it into next for v5.9, unless there's a reason it needs to go
> into v5.8.

I consider it a fixup for 5.8 merge window conflicts. Besides, mering it
now may avoid new conflicts in 5.9 ;-)

> cheers

-- 
Sincerely yours,
Mike.

[PATCH] powerpc/8xx: use pmd_off() to access a PMD entry in pte_update()

2020-06-15 Thread Mike Rapoport

From: Mike Rapoport 

The pte_update() implementation for PPC_8xx unfolds page table from the PGD
level to access a PMD entry. Since 8xx has only 2-level page table this can
be simplified with pmd_off() shortcut.

Replace explicit unfolding with pmd_off() and drop defines of pgd_index()
and pgd_offset() that are no longer needed.

Signed-off-by: Mike Rapoport 
---

I think it's powerpc material, but I won't mind if Andrew picks it up :)


 arch/powerpc/include/asm/nohash/32/pgtable.h | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index b56f14160ae5..5a590ceaec14 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -205,10 +205,6 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
-/* to find an entry in a page-table-directory */
-#define pgd_index(address)  ((address) >> PGDIR_SHIFT)
-#define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address))
-
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
@@ -230,6 +226,8 @@ static inline void pmd_clear(pmd_t *pmdp)
  * For other page sizes, we have a single entry in the table.
  */
 #ifdef CONFIG_PPC_8xx
+static pmd_t *pmd_off(struct mm_struct *mm, unsigned long addr);
+
 static inline pte_basic_t pte_update(struct mm_struct *mm, unsigned long addr, 
pte_t *p,
 unsigned long clr, unsigned long set, int 
huge)
 {
@@ -237,7 +235,7 @@ static inline pte_basic_t pte_update(struct mm_struct *mm, 
unsigned long addr, p
pte_basic_t old = pte_val(*p);
pte_basic_t new = (old & ~(pte_basic_t)clr) | set;
int num, i;
-   pmd_t *pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, addr), 
addr), addr), addr);
+   pmd_t *pmd = pmd_off(mm, addr);
 
if (!huge)
num = PAGE_SIZE / SZ_4K;
-- 
2.26.2

Re: [PATCH 04/21] mm: free_area_init: use maximal zone PFNs rather than zone sizes

2020-06-15 Thread Mike Rapoport

Hi Greg,

On Mon, Jun 15, 2020 at 01:53:42PM +1000, Greg Ungerer wrote:
> Hi Mike,
> 
> From: Mike Rapoport 
> > Currently, architectures that use free_area_init() to initialize memory map
> > and node and zone structures need to calculate zone and hole sizes. We can
> > use free_area_init_nodes() instead and let it detect the zone boundaries
> > while the architectures will only have to supply the possible limits for
> > the zones.
> > 
> > Signed-off-by: Mike Rapoport 
> 
> This is causing some new warnings for me on boot on at least one non-MMU m68k 
> target:

There were a couple of changes that cause this. The free_area_init()
now relies on memblock data and architectural limits for zone sizes
rather than on explisit pfns calculated by the arch code. I've update
motorola variant and missed coldfire. Angelo sent a fix for mcfmmu.c
[1] and I've updated it to include nommu as well

[1] 
https://lore.kernel.org/linux-m68k/20200614225119.02-1-angelo.dureghe...@timesys.com

>From 55b8523df2a5c4565b132c0691990f0821040fec Mon Sep 17 00:00:00 2001
From: Angelo Dureghello 
Date: Mon, 15 Jun 2020 00:51:19 +0200
Subject: [PATCH] m68k: fix registration of memory regions with memblock

Commit 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option")
introduced assumption that UMA systems have their memory at node 0 and
updated most of them, but it forgot nommu and coldfire variants of m68k.

The later change in free area initialization in commit fa3354e4ea39 ("mm:
free_area_init: use maximal zone PFNs rather than zone sizes") exposed that
and caused a lot of "BUG: Bad page state in process swapper" reports.

Using memblock_add_node() with nid = 0 to register memory banks solves the
problem.

Fixes: 3f08a302f533 ("mm: remove CONFIG_HAVE_MEMBLOCK_NODE_MAP option")
Fixes: fa3354e4ea39 ("mm: free_area_init: use maximal zone PFNs rather than 
zone sizes")
Signed-off-by: Angelo Dureghello 
Co-developed-by: Mike Rapoport 
Signed-off-by: Mike Rapoport 
---
 arch/m68k/kernel/setup_no.c | 2 +-
 arch/m68k/mm/mcfmmu.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/m68k/kernel/setup_no.c b/arch/m68k/kernel/setup_no.c
index e779b19e0193..0c4589a39ba9 100644
--- a/arch/m68k/kernel/setup_no.c
+++ b/arch/m68k/kernel/setup_no.c
@@ -138,7 +138,7 @@ void __init setup_arch(char **cmdline_p)
pr_debug("MEMORY -> ROMFS=0x%p-0x%06lx MEM=0x%06lx-0x%06lx\n ",
 __bss_stop, memory_start, memory_start, memory_end);
 
-   memblock_add(memory_start, memory_end - memory_start);
+   memblock_add_node(memory_start, memory_end - memory_start, 0);
 
/* Keep a copy of command line */
*cmdline_p = _line[0];
diff --git a/arch/m68k/mm/mcfmmu.c b/arch/m68k/mm/mcfmmu.c
index 29f47923aa46..7d04210d34f0 100644
--- a/arch/m68k/mm/mcfmmu.c
+++ b/arch/m68k/mm/mcfmmu.c
@@ -174,7 +174,7 @@ void __init cf_bootmem_alloc(void)
m68k_memory[0].addr = _rambase;
m68k_memory[0].size = _ramend - _rambase;
 
-   memblock_add(m68k_memory[0].addr, m68k_memory[0].size);
+   memblock_add_node(m68k_memory[0].addr, m68k_memory[0].size, 0);
 
/* compute total pages in system */
num_pages = PFN_DOWN(_ramend - _rambase);
-- 
2.26.2


> ...
> NET: Registered protocol family 17
> BUG: Bad page state in process swapper  pfn:20165
> page:41fe0ca0 refcount:0 mapcount:1 mapping: index:0x0
> flags: 0x0()
> raw:  0100 0122     
> page dumped because: nonzero mapcount
> CPU: 0 PID: 1 Comm: swapper Not tainted 5.8.0-rc1-1-g3a38f8a60c65-dirty #1
> Stack from 404c9ebc:
> 404c9ebc 4029ab28 4029ab28 40088470 41fe0ca0 40299e21 40299df1 
> 404ba2a4
> 00020165  41fd2c10 402c7ba0 41fd2c04 40088504 41fe0ca0 
> 40299e21
>  40088a12 41fe0ca0 41fe0ca4 020a  0001 
> 402ca000
>  41fe0ca0 41fd2c10 41fd2c10   402b2388 
> 0001

...

> 
> System boots pretty much as normal through user space after this.
> Seems to be fully operational despite all those BUGONs.
> 
> Specifically this is a M5208EVB target (arch/m68k/configs/m5208evb).
> 
> 
> [snip]
> > diff --git a/arch/m68k/mm/init.c b/arch/m68k/mm/init.c
> > index b88d510d4fe3..6d3147662ff2 100644
> > --- a/arch/m68k/mm/init.c
> > +++ b/arch/m68k/mm/init.c
> > @@ -84,7 +84,7 @@ void __init paging_init(void)
> >  * page_alloc get different views of the world.
> >  */
> > unsigned long end_mem = memory_end & PAGE_MASK;
> > -   unsigned long zones_size[MAX_NR_ZONES] = { 0, };
> > +   unsigned long max_zone_pfn[MAX_NR_ZONES] = { 0, };
> > high_memory = (void *) end_mem;
> > @@ -98

Re: [PATCH] mm: Move p?d_alloc_track to separate header file

2020-06-09 Thread Mike Rapoport

On Tue, Jun 09, 2020 at 02:05:33PM +0200, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> The functions are only used in two source files, so there is no need
> for them to be in the global  header. Move them to the new
>  header and include it only where needed.
> 
> Signed-off-by: Joerg Roedel 

Acked-by: Mike Rapoport 

> ---
>  include/linux/mm.h| 45 ---
>  include/linux/pgalloc-track.h | 51 +++
>  lib/ioremap.c |  1 +
>  mm/vmalloc.c  |  1 +
>  4 files changed, 53 insertions(+), 45 deletions(-)
>  create mode 100644 include/linux/pgalloc-track.h
> 
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 9d6042178ca7..22d8b2a2c9bc 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2092,51 +2092,11 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
>  
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> -  unsigned long address,
> -  pgtbl_mod_mask *mod_mask)
> -
> -{
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> - return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> - }
> -
> - return p4d_offset(pgd, address);
> -}
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> -  unsigned long address,
> -  pgtbl_mod_mask *mod_mask)
> -{
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> - return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> - }
> -
> - return pud_offset(p4d, address);
> -}
> -
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> long address)
>  {
>   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
>   NULL: pmd_offset(pud, address);
>  }
> -
> -static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
> -  unsigned long address,
> -  pgtbl_mod_mask *mod_mask)
> -{
> - if (unlikely(pud_none(*pud))) {
> - if (__pmd_alloc(mm, pud, address))
> - return NULL;
> - *mod_mask |= PGTBL_PUD_MODIFIED;
> - }
> -
> - return pmd_offset(pud, address);
> -}
>  #endif /* CONFIG_MMU */
>  
>  #if USE_SPLIT_PTE_PTLOCKS
> @@ -2252,11 +2212,6 @@ static inline void pgtable_pte_page_dtor(struct page 
> *page)
>   ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \
>   NULL: pte_offset_kernel(pmd, address))
>  
> -#define pte_alloc_kernel_track(pmd, address, mask)   \
> - ((unlikely(pmd_none(*(pmd))) && \
> -   (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\
> - NULL: pte_offset_kernel(pmd, address))
> -
>  #if USE_SPLIT_PMD_PTLOCKS
>  
>  static struct page *pmd_to_page(pmd_t *pmd)
> diff --git a/include/linux/pgalloc-track.h b/include/linux/pgalloc-track.h
> new file mode 100644
> index ..1dcc865029a2
> --- /dev/null
> +++ b/include/linux/pgalloc-track.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef _LINUX_PGALLLC_TRACK_H
> +#define _LINUX_PGALLLC_TRACK_H
> +
> +#if defined(CONFIG_MMU)
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +  unsigned long address,
> +  pgtbl_mod_mask *mod_mask)
> +{
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
> + return NULL;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
> + }
> +
> + return p4d_offset(pgd, address);
> +}
> +
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +  unsigned long address,
> +  pgtbl_mod_mask *mod_mask)
> +{
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
> + return NULL;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
> + }
> +
> + return pud_offset(p4d, address);
> +}
> +
> +static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud,
> +  unsigned long address,
> +  pgtb

Re: [PATCH] mm: Fix pud_alloc_track()

2020-06-04 Thread Mike Rapoport

On Thu, Jun 04, 2020 at 09:44:46AM +0200, Joerg Roedel wrote:
> From: Joerg Roedel 
> 
> The pud_alloc_track() needs to do different checks based on whether
> __ARCH_HAS_5LEVEL_HACK is defined, like it already does in
> pud_alloc(). Otherwise it causes boot failures on PowerPC.
> 
> Provide the correct implementations for both possible settings of
> __ARCH_HAS_5LEVEL_HACK to fix the boot problems.

There is a patch in mmotm [1] that completely removes
__ARCH_HAS_5LEVEL_HACK which is a part of the series [2] that updates
p4d folding accross architectures. This should fix boot on PowerPC and
the addition of pXd_alloc_track() for __ARCH_HAS_5LEVEL_HACK wouldn't be
necessary.


[1] 
https://github.com/hnaz/linux-mm/commit/cfae68792af3731ac902ea6ba5ed8df5a0f6bd2f
[2] https://lore.kernel.org/kvmarm/20200414153455.21744-1-r...@kernel.org/

> Reported-by: Abdul Haleem 
> Tested-by: Abdul Haleem 
> Tested-by: Satheesh Rajendran 
> Fixes: d8626138009b ("mm: add functions to track page directory 
> modifications")
> Signed-off-by: Joerg Roedel 
> ---
>  include/asm-generic/5level-fixup.h |  5 +
>  include/linux/mm.h | 26 +-
>  2 files changed, 18 insertions(+), 13 deletions(-)
> 
> diff --git a/include/asm-generic/5level-fixup.h 
> b/include/asm-generic/5level-fixup.h
> index 58046ddc08d0..afbab31fbd7e 100644
> --- a/include/asm-generic/5level-fixup.h
> +++ b/include/asm-generic/5level-fixup.h
> @@ -17,6 +17,11 @@
>   ((unlikely(pgd_none(*(p4d))) && __pud_alloc(mm, p4d, address)) ? \
>   NULL : pud_offset(p4d, address))
>  
> +#define pud_alloc_track(mm, p4d, address, mask)  
> \
> + ((unlikely(pgd_none(*(p4d))) && 
> \
> +   (__pud_alloc(mm, p4d, address) || 
> ({*(mask)|=PGTBL_P4D_MODIFIED;0;})))?   \
> +   NULL : pud_offset(p4d, address))
> +
>  #define p4d_alloc(mm, pgd, address)  (pgd)
>  #define p4d_alloc_track(mm, pgd, address, mask)  (pgd)
>  #define p4d_offset(pgd, start)   (pgd)
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 66e0977f970a..ad3b31c5bcc3 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -2088,35 +2088,35 @@ static inline pud_t *pud_alloc(struct mm_struct *mm, 
> p4d_t *p4d,
>   NULL : pud_offset(p4d, address);
>  }
>  
> -static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
> +static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> -
>  {
> - if (unlikely(pgd_none(*pgd))) {
> - if (__p4d_alloc(mm, pgd, address))
> + if (unlikely(p4d_none(*p4d))) {
> + if (__pud_alloc(mm, p4d, address))
>   return NULL;
> - *mod_mask |= PGTBL_PGD_MODIFIED;
> + *mod_mask |= PGTBL_P4D_MODIFIED;
>   }
>  
> - return p4d_offset(pgd, address);
> + return pud_offset(p4d, address);
>  }
>  
> -#endif /* !__ARCH_HAS_5LEVEL_HACK */
> -
> -static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d,
> +static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd,
>unsigned long address,
>pgtbl_mod_mask *mod_mask)
> +
>  {
> - if (unlikely(p4d_none(*p4d))) {
> - if (__pud_alloc(mm, p4d, address))
> + if (unlikely(pgd_none(*pgd))) {
> + if (__p4d_alloc(mm, pgd, address))
>   return NULL;
> - *mod_mask |= PGTBL_P4D_MODIFIED;
> + *mod_mask |= PGTBL_PGD_MODIFIED;
>   }
>  
> - return pud_offset(p4d, address);
> + return p4d_offset(pgd, address);
>  }
>  
> +#endif /* !__ARCH_HAS_5LEVEL_HACK */
> +
>  static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned 
> long address)
>  {
>   return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))?
> -- 
> 2.26.2
> 

-- 
Sincerely yours,
Mike.

Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Mike Rapoport

On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote:
> 
> sparc32 smp images in next-20200603 still crash for me with a spinlock
> recursion. s390 images hang early in boot. Several others (alpha, arm64,
> various ppc) don't even compile. I can run some more bisects over time,
> but this is becoming a full-time job :-(.

I've been able to bisect s390 hang to commit b614345f52bc ("x86/entry:
Clarify irq_{enter,exit}_rcu()").

After this commit, lockdep_hardirq_exit() is called twice on s390 (and
others) - one time in irq_exit_rcu() and another one in irq_exit():

/**
 * irq_exit_rcu() - Exit an interrupt context without updating RCU
 *
 * Also processes softirqs if needed and possible.
 */
void irq_exit_rcu(void)
{
__irq_exit_rcu();
 /* must be last! */
lockdep_hardirq_exit();
}

/**
 * irq_exit - Exit an interrupt context, update RCU and lockdep
 *
 * Also processes softirqs if needed and possible.
 */
void irq_exit(void)
{
irq_exit_rcu();
rcu_irq_exit();
 /* must be last! */
lockdep_hardirq_exit();
}

Removing the call in irq_exit() make s390 boot again, and judgung by the
x86 entry code, the comment /* must be last! */ is stale...

@Peter, @Thomas, can you comment please?

>From e51d50ee6f4d1f446decf91c2c67230da14ff82c Mon Sep 17 00:00:00 2001
From: Mike Rapoport 
Date: Thu, 4 Jun 2020 12:37:03 +0300
Subject: [PATCH] softirq: don't call lockdep_hardirq_exit() twice

After commit b614345f52bc ("x86/entry: Clarify irq_{enter,exit}_rcu()")
lockdep_hardirq_exit() is called twice on every architecture that uses
irq_exit(): one time in irq_exit_rcu() and another one in irq_exit().

Remove the extra call in irq_exit().

Signed-off-by: Mike Rapoport 
---
 kernel/softirq.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/kernel/softirq.c b/kernel/softirq.c
index a3eb6eba8c41..7523f4ce4c1d 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -427,7 +427,6 @@ static inline void __irq_exit_rcu(void)
 void irq_exit_rcu(void)
 {
__irq_exit_rcu();
-/* must be last! */
lockdep_hardirq_exit();
 }

@@ -440,8 +439,6 @@ void irq_exit(void)
 {
irq_exit_rcu();
rcu_irq_exit();
-/* must be last! */
-   lockdep_hardirq_exit();
 }

 /*
-- 
2.26.2

> Guenter

-- 
Sincerely yours,
Mike.

Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Mike Rapoport

On Wed, Jun 03, 2020 at 11:22:26PM -0700, Ira Weiny wrote:
> On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote:
> 
> With linux-next on sparc I too see the spinlock issue; something like:
> 
> ...
> Starting syslogd: BUG: spinlock recursion on CPU#0, S01syslogd/139
>  lock: 0xf53ef350, .magic: dead4ead, .owner: S01syslogd/139, .owner_cpu: 0
> CPU: 0 PID: 139 Comm: S01syslogd Not tainted 5.7.0-next-20200603 #1
> [f0067d00 : 
> do_raw_spin_lock+0xa8/0xd8 ] 
> [f00d598c : 
> copy_page_range+0x328/0x804 ] 
> [f0025c34 : 
> dup_mm+0x334/0x434 ] 
> [f0027198 : 
> copy_process+0x1248/0x12d4 ] 
> [f00273b8 : 
> _do_fork+0x54/0x30c ] 
> [f00276e4 : 
> do_fork+0x5c/0x6c ] 
> [f000de44 : 
> sparc_do_fork+0x18/0x38 ] 
> [f000b7f4 : 
> do_syscall+0x34/0x40 ] 
> [5010cd4c : 
> 0x5010cd4c ] 
> 
> 
> I'm going to bisect between there and HEAD.

The sparc issue should be fixed by 

https://lore.kernel.org/lkml/20200526173302.377-1-w...@kernel.org
 
> Ira

-- 
Sincerely yours,
Mike.

Re: [PATCH] arch/{mips,sparc,microblaze,powerpc}: Don't enable pagefault/preempt twice

2020-06-04 Thread Mike Rapoport

On Wed, Jun 03, 2020 at 04:44:17PM -0700, Guenter Roeck wrote:
> On 6/3/20 2:14 PM, Ira Weiny wrote:
> > On Wed, Jun 03, 2020 at 01:57:36PM -0700, Andrew Morton wrote:
> >> On Thu, 21 May 2020 10:42:50 -0700 Ira Weiny  wrote:
> >>
> >>>>>
> >>>>> Actually it occurs to me that the patch consolidating kmap_prot is odd 
> >>>>> for
> >>>>> sparc 32 bit...
> >>>>>
> >>>>> Its a long shot but could you try reverting this patch?
> >>>>>
> >>>>> 4ea7d2419e3f kmap: consolidate kmap_prot definitions
> >>>>>
> >>>>
> >>>> That is not easy to revert, unfortunately, due to several follow-up 
> >>>> patches.
> >>>
> >>> I have gotten your sparc tests to run and they all pass...
> >>>
> >>> 08:10:34 > ../linux-build-test/rootfs/sparc/run-qemu-sparc.sh 
> >>> Build reference: v5.7-rc4-17-g852b6f2edc0f
> >>>
> >>> Building sparc32:SPARCClassic:nosmp:scsi:hd ... running . passed
> >>> Building sparc32:SPARCbook:nosmp:scsi:cd ... running . passed
> >>> Building sparc32:LX:nosmp:noapc:scsi:hd ... running . passed
> >>> Building sparc32:SS-4:nosmp:initrd ... running . passed
> >>> Building sparc32:SS-5:nosmp:scsi:hd ... running . passed
> >>> Building sparc32:SS-10:nosmp:scsi:cd ... running . passed
> >>> Building sparc32:SS-20:nosmp:scsi:hd ... running . passed
> >>> Building sparc32:SS-600MP:nosmp:scsi:hd ... running . passed
> >>> Building sparc32:Voyager:nosmp:noapc:scsi:hd ... running . passed
> >>> Building sparc32:SS-4:smp:scsi:hd ... running . passed
> >>> Building sparc32:SS-5:smp:scsi:cd ... running . passed
> >>> Building sparc32:SS-10:smp:scsi:hd ... running . passed
> >>> Building sparc32:SS-20:smp:scsi:hd ... running . passed
> >>> Building sparc32:SS-600MP:smp:scsi:hd ... running . passed
> >>> Building sparc32:Voyager:smp:noapc:scsi:hd ... running . passed
> >>>
> >>> Is there another test I need to run?
> >>
> >> This all petered out, but as I understand it, this patchset still might
> >> have issues on various architectures.
> >>
> >> Can folks please provide an update on the testing status?
> > 
> > I believe the tests were failing for Guenter due to another patch set...[1]
> > 
> > My tests with just this series are working.
> > 
> >>From my understanding the other failures were unrelated.[2]
> > 
> > 
> > I've checked the patch above on top of the mmots which already has
> > Ira's patches and it booted fine. I've used sparc32_defconfig to build
> > the kernel and qemu-system-sparc with default machine and CPU.
> > 
> > 
> > Mike, am I wrong?  Do you think the kmap() patches are still causing issues?

sparc32 UP and microblaze work for me with next-20200603, but I didn't
test other architectures. 
 
> For my part, all I can say is that -next is in pretty bad shape right now.
> The summary of my tests says:
> 
> Build results:
>   total: 151 pass: 130 fail: 21
> Qemu test results:
>   total: 430 pass: 375 fail: 55
> 
> sparc32 smp images in next-20200603 still crash for me with a spinlock
> recursion.

I think this is because Will's fixes [1] are not yet in -next.

> s390 images hang early in boot. Several others (alpha, arm64,
> various ppc) don't even compile. I can run some more bisects over time,
> but this is becoming a full-time job :-(.
> 
> Guenter

[1] https://lore.kernel.org/lkml/20200526173302.377-1-w...@kernel.org
-- 
Sincerely yours,
Mike.

Re: [PATCH v4 03/14] arm64: add support for folded p4d page tables

2020-05-16 Thread Mike Rapoport

On Fri, May 15, 2020 at 11:40:12AM -0700, Andrew Morton wrote:
> On Tue, 14 Apr 2020 18:34:44 +0300 Mike Rapoport  wrote:
> 
> > Implement primitives necessary for the 4th level folding, add walks of p4d
> > level where appropriate, replace 5level-fixup.h with pgtable-nop4d.h and
> > remove __ARCH_USE_5LEVEL_HACK.
> 
> This needed some rework due to arm changes in linux-next.  Please check
> my handiwork and test it once I've merged this into linux-next?

Looks ok to me. It passed defconfig and a couple of randconfig builds
and qemu-system-aarch64 boots find with this.

> Rejects were
> 
> --- 
> arch/arm64/include/asm/pgtable.h~arm64-add-support-for-folded-p4d-page-tables
> +++ arch/arm64/include/asm/pgtable.h
> @@ -596,49 +604,50 @@ static inline phys_addr_t pud_page_paddr
>  
>  #define pud_ERROR(pud)   __pud_error(__FILE__, __LINE__, 
> pud_val(pud))
>  
> -#define pgd_none(pgd)(!pgd_val(pgd))
> -#define pgd_bad(pgd) (!(pgd_val(pgd) & 2))
> -#define pgd_present(pgd) (pgd_val(pgd))
> +#define p4d_none(p4d)(!p4d_val(p4d))
> +#define p4d_bad(p4d) (!(p4d_val(p4d) & 2))
> +#define p4d_present(p4d) (p4d_val(p4d))
>  
> -static inline void set_pgd(pgd_t *pgdp, pgd_t pgd)
> +static inline void set_p4d(p4d_t *p4dp, p4d_t p4d)
>  {
> - if (in_swapper_pgdir(pgdp)) {
> - set_swapper_pgd(pgdp, pgd);
> + if (in_swapper_pgdir(p4dp)) {
> + set_swapper_pgd((pgd_t *)p4dp, __pgd(p4d_val(p4d)));
>   return;
>   }
>  
> - WRITE_ONCE(*pgdp, pgd);
> + WRITE_ONCE(*p4dp, p4d);
>   dsb(ishst);
>   isb();
>  }
>  
> -static inline void pgd_clear(pgd_t *pgdp)
> +static inline void p4d_clear(p4d_t *p4dp)
>  {
> - set_pgd(pgdp, __pgd(0));
> + set_p4d(p4dp, __p4d(0));
>  }
>  
> -static inline phys_addr_t pgd_page_paddr(pgd_t pgd)
> +static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
>  {
> - return __pgd_to_phys(pgd);
> + return __p4d_to_phys(p4d);
>  }
>  
>  /* Find an entry in the frst-level page table. */
>  #define pud_index(addr)  (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD 
> - 1))
>  
> -#define pud_offset_phys(dir, addr)   (pgd_page_paddr(READ_ONCE(*(dir))) + 
> pud_index(addr) * sizeof(pud_t))
> +#define pud_offset_phys(dir, addr)   (p4d_page_paddr(READ_ONCE(*(dir))) + 
> pud_index(addr) * sizeof(pud_t))
>  #define pud_offset(dir, addr)((pud_t 
> *)__va(pud_offset_phys((dir), (addr
>  
>  #define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, 
> addr))
> -#define pud_set_fixmap_offset(pgd, addr) 
> pud_set_fixmap(pud_offset_phys(pgd, addr))
> +#define pud_set_fixmap_offset(p4d, addr) 
> pud_set_fixmap(pud_offset_phys(p4d, addr))
>  #define pud_clear_fixmap()   clear_fixmap(FIX_PUD)
>  
> -#define pgd_page(pgd)
> pfn_to_page(__phys_to_pfn(__pgd_to_phys(pgd)))
> +#define p4d_page(p4d)
> pfn_to_page(__phys_to_pfn(__p4d_to_phys(p4d)))
>  
>  /* use ONLY for statically allocated translation tables */
>  #define pud_offset_kimg(dir,addr)((pud_t 
> *)__phys_to_kimg(pud_offset_phys((dir), (addr
>  
>  #else
>  
> +#define p4d_page_paddr(p4d)  ({ BUILD_BUG(); 0;})
>  #define pgd_page_paddr(pgd)  ({ BUILD_BUG(); 0;})
>  
>  /* Match pud_offset folding in  */
> 
> 
> 
> and
> 
> --- arch/arm64/kvm/mmu.c~arm64-add-support-for-folded-p4d-page-tables
> +++ arch/arm64/kvm/mmu.c
> @@ -469,7 +517,7 @@ static void stage2_flush_memslot(struct
>   do {
>   next = stage2_pgd_addr_end(kvm, addr, end);
>   if (!stage2_pgd_none(kvm, *pgd))
> - stage2_flush_puds(kvm, pgd, addr, next);
> + stage2_flush_p4ds(kvm, pgd, addr, next);
>   } while (pgd++, addr = next, addr != end);
>  }
>  
> 
> 
> Result:
> 
> From: Mike Rapoport 
> Subject: arm64: add support for folded p4d page tables
> 
> Implement primitives necessary for the 4th level folding, add walks of p4d
> level where appropriate, replace 5level-fixup.h with pgtable-nop4d.h and
> remove __ARCH_USE_5LEVEL_HACK.
> 
> Link: http://lkml.kernel.org/r/20200414153455.21744-4-r...@kernel.org
> Signed-off-by: Mike Rapoport 
> Cc: Arnd Bergmann 
> Cc: Benjamin Herrenschmidt 
> Cc: Brian Cain 
> Cc: Catalin Marinas 
> Cc: Christophe Leroy 
> Cc: Fenghua Yu 
> Cc: Geert Uytterhoeven 
> Cc: Guan Xuetao 
> Cc: James Morse 
> Cc: Jonas Bonn 
> Cc: Julien Thierry 
> Cc: Ley Foon Tan 
> Cc: Marc Zyngier 
> Cc: Michael Ellerman 
> Cc: Paul Mackerras 
> Cc: Rich Felke

[PATCH v2 12/12] mm: consolidate pgd_index() and pgd_offset{_k}() definitions

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

All architectures tables define pgd_offset() as an entry in the array of
PGDs indexed by the pgd_index(), where pgd_index() is

(address >> PGD_SHIFT) & (PTRS_PER_PGD - 1)

For the most cases, the pgd_offset() uses mm->pgd as the pointer to the
top-level page directory and the pgd_offset_k() is a helper that presumes
that mm == _mm.

Use x86 implementation as the generic one and remove redundant definitions
of PGD accessors in most of arch/*/include/asm/pgtable.h

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* ia64 has custom implementation of pgd_index()
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  9 
 arch/arc/include/asm/pgtable.h   |  7 ---
 arch/arm/include/asm/pgtable.h   |  8 
 arch/arm64/include/asm/pgtable.h | 10 
 arch/arm64/kernel/hibernate.c|  4 +-
 arch/arm64/mm/kasan_init.c   |  2 +-
 arch/arm64/mm/mmu.c  |  8 ++--
 arch/csky/include/asm/pgtable.h  | 11 -
 arch/hexagon/include/asm/pgtable.h   | 18 ---
 arch/ia64/include/asm/pgtable.h  | 14 +-
 arch/m68k/include/asm/mcf_pgtable.h  | 11 -
 arch/m68k/include/asm/motorola_pgtable.h | 16 ---
 arch/m68k/include/asm/sun3_pgtable.h |  9 
 arch/microblaze/include/asm/pgtable.h|  7 ---
 arch/mips/include/asm/pgtable-32.h   |  8 
 arch/mips/include/asm/pgtable-64.h   |  8 
 arch/nds32/include/asm/pgtable.h |  6 ---
 arch/nios2/include/asm/pgtable.h |  8 
 arch/openrisc/include/asm/pgtable.h  | 10 
 arch/parisc/include/asm/pgtable.h|  9 
 arch/powerpc/include/asm/book3s/32/pgtable.h |  7 ---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 13 -
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ---
 arch/powerpc/include/asm/nohash/64/pgtable.h | 12 -
 arch/riscv/include/asm/pgtable.h | 10 
 arch/riscv/mm/init.c | 12 ++---
 arch/s390/include/asm/pgtable.h  |  1 -
 arch/sh/include/asm/pgtable_32.h |  7 ---
 arch/sh/include/asm/pgtable_64.h | 11 -
 arch/sparc/include/asm/pgtable_32.h  |  8 
 arch/sparc/include/asm/pgtable_64.h  |  7 ---
 arch/um/include/asm/pgtable.h| 50 ++--
 arch/unicore32/include/asm/pgtable.h |  8 
 arch/x86/include/asm/pgtable.h   | 24 --
 arch/xtensa/include/asm/pgtable.h|  8 
 include/linux/pgtable.h  | 26 ++
 36 files changed, 55 insertions(+), 339 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 314973d2810d..162c17b2631f 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -276,15 +276,6 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) 
&= ~_PAGE_FOW; return
 extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; 
return pte; }
 extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= 
__ACCESS_BITS; return pte; }
 
-#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(_mm, (address))
-
-/* to find an entry in a page-table-directory. */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-#define pgd_offset(mm, address)((mm)->pgd+pgd_index(address))
-
 /*
  * The smp_read_barrier_depends() in the following functions are required to
  * order the load of *dir (the pointer in the top level page table) with any
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1146905f594b..f1ed17edb085 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -316,13 +316,6 @@ static inline void set_pte_at(struct mm_struct *mm, 
unsigned long addr,
set_pte(ptep, pteval);
 }
 
-/*
- * All kernel related VM pages are in init's mm.
- */
-#define pgd_offset_k(address)  pgd_offset(_mm, address)
-#define pgd_index(addr)((addr) >> PGDIR_SHIFT)
-#define pgd_offset(mm, addr)   (((mm)->pgd)+pgd_index(addr))
-
 /*
  * Macro to quickly access the PGD entry, utlising the fact that some
  * arch may cache the pointer to Page Directory of "current" task
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 41543bc47660..c02f24400369 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -166,14 +166,6 @@ extern struct page *empty_zero_page;
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
-/* to find an entry in

[PATCH v2 11/12] mm: consolidate pud_index() and pud_offset() definitions

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

All architectures that have at least four-level page tables define
pud_offset() as an entry in the array of PUDs indexed by the pud_index(),
where pud_index() is

(address >> PUD_SHIFT) & (PTRS_PER_PUD - 1)

For the most architectures the pud_offset() implementation relies on
the availability of pud_page_vaddr() that converts a PUD entry value to the
virtual address of the page containing PUD array.

Let's use such implementation as a generic and drop most of the definitions
of pud_index() and pud_offset() in  files.

The architectures that didn't provide pud_page_vaddr() are updated to
have that defined.

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgtable.h|  8 +---
 arch/csky/include/asm/pgtable.h |  2 --
 arch/ia64/include/asm/pgtable.h |  6 --
 arch/mips/include/asm/pgtable-32.h  |  1 -
 arch/mips/include/asm/pgtable-64.h  |  7 ---
 arch/powerpc/include/asm/book3s/64/pgtable.h|  4 
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h |  4 
 arch/s390/include/asm/pgtable.h |  1 +
 arch/sh/include/asm/pgtable_32.h|  2 --
 arch/sh/include/asm/pgtable_64.h|  2 --
 arch/sparc/include/asm/pgtable_64.h |  5 -
 arch/x86/include/asm/pgtable.h  | 11 ---
 include/asm-generic/pgtable-nopud.h |  1 +
 include/linux/pgtable.h | 16 
 14 files changed, 23 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d0175335a2f2..c0155814b374 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -626,11 +626,13 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
 }
 
-/* Find an entry in the frst-level page table. */
-#define pud_index(addr)(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD 
- 1))
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+   return (unsigned long)__va(p4d_page_paddr(p4d));
+}
 
+/* Find an entry in the frst-level page table. */
 #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + 
pud_index(addr) * sizeof(pud_t))
-#define pud_offset(dir, addr)  ((pud_t *)__va(pud_offset_phys((dir), 
(addr
 
 #define pud_set_fixmap(addr)   ((pud_t *)set_fixmap_offset(FIX_PUD, 
addr))
 #define pud_set_fixmap_offset(p4d, addr)   
pud_set_fixmap(pud_offset_phys(p4d, addr))
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index dc613f20e2e1..c5ab20970857 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -220,8 +220,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address)  pgd_offset(_mm, address)
 
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index d75d981e6ee1..4c24e5e18bff 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -383,12 +383,6 @@ pgd_offset (const struct mm_struct *mm, unsigned long 
address)
here.  */
 #define pgd_offset_gate(mm, addr)  pgd_offset_k(addr)
 
-#if CONFIG_PGTABLE_LEVELS == 4
-/* Find an entry in the second-level page table.. */
-#define pud_offset(dir,addr) \
-   ((pud_t *) p4d_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & 
(PTRS_PER_PUD - 1)))
-#endif
-
 /* atomic versions of the some PTE manipulations: */
 
 static inline int
diff --git a/arch/mips/include/asm/pgtable-32.h 
b/arch/mips/include/asm/pgtable-32.h
index 05b51c344939..9c0e7a5ffc75 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -199,7 +199,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
prot)
 #define pgd_offset_k(address) pgd_offset(_mm, address)
 
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
 /* to find an entry in a page-table-directory */
 #define pgd_offset(mm, addr)   ((mm)->pgd + pgd_index(addr))
diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 21fb55510d35..38170fdac5bf 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -172,8 +172,6 @@
 
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * For 4-level

[PATCH v2 10/12] mm: consolidate pmd_index() and pmd_offset() definitions

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

All architectures define pmd_index() as

(address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)

and all architectures that have at least three-level page tables define
pmd_offset() as an entry in the array of PMDs indexed by the pmd_index().

For the most architectures the pmd_offset() implementation relies on
the availability of pud_page_vaddr() that converts a PMD entry value to the
virtual address of the page containing PMD array.

Let's use such implementation as a generic and drop most of the definitions
of pmd_index() and pmd_offset() in  files.

The architectures that didn't provide pud_page_vaddr() are updated to
have that defined.

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* alpha has special requirements for memory access ordering
* arm has custom definition of folded 2-level page tables
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  1 +
 arch/arm/include/asm/pgtable-2level.h|  1 +
 arch/arm/include/asm/pgtable-3level.h|  7 -
 arch/arm/include/asm/pgtable-nommu.h |  1 -
 arch/arm64/include/asm/pgtable.h |  8 --
 arch/c6x/include/asm/pgtable.h   |  1 -
 arch/csky/include/asm/pgtable.h  |  1 -
 arch/hexagon/include/asm/pgtable.h   |  9 --
 arch/ia64/include/asm/pgtable.h  |  4 ---
 arch/m68k/include/asm/motorola_pgtable.h |  7 -
 arch/microblaze/include/asm/pgtable.h|  1 -
 arch/mips/include/asm/pgtable-32.h   |  1 -
 arch/mips/include/asm/pgtable-64.h   |  6 
 arch/parisc/include/asm/pgtable.h|  8 --
 arch/parisc/kernel/pci-dma.c |  2 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h |  3 --
 arch/powerpc/include/asm/nohash/64/pgtable.h |  3 --
 arch/riscv/include/asm/pgtable-64.h  |  7 -
 arch/riscv/mm/init.c | 12 
 arch/s390/include/asm/pgtable.h  |  1 +
 arch/sh/include/asm/pgtable-3level.h |  7 -
 arch/sh/include/asm/pgtable_32.h |  1 -
 arch/sh/include/asm/pgtable_64.h |  1 -
 arch/sparc/include/asm/pgtable_32.h  |  9 +-
 arch/sparc/include/asm/pgtable_64.h  |  7 +
 arch/um/include/asm/pgtable-3level.h |  4 ---
 arch/um/include/asm/pgtable.h|  4 ---
 arch/x86/include/asm/pgtable.h   | 17 
 include/asm-generic/pgtable-nopmd.h  |  1 +
 include/linux/pgtable.h  | 29 ++--
 30 files changed, 44 insertions(+), 120 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index dac20d03b727..314973d2810d 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -305,6 +305,7 @@ extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long 
address)
smp_read_barrier_depends(); /* see above */
return ret;
 }
+#define pmd_offset pmd_offset
 
 /* Find an entry in the third-level page table.. */
 extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 9e084a464a97..3502c2f746ca 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -187,6 +187,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long 
addr)
 {
return (pmd_t *)pud;
 }
+#define pmd_offset pmd_offset
 
 #define pmd_large(pmd) (pmd_val(pmd) & 2)
 #define pmd_leaf(pmd)  (pmd_val(pmd) & 2)
diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index 1933aed9f68d..fbb6693c3352 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -133,13 +133,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud)
return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
-/* Find an entry in the second-level page table.. */
-#define pmd_index(addr)(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD 
- 1))
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
-   return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
-
 #define pmd_bad(pmd)   (!(pmd_val(pmd) & 2))
 
 #define copy_pmd(pmdpd,pmdps)  \
diff --git a/arch/arm/include/asm/pgtable-nommu.h 
b/arch/arm/include/asm/pgtable-nommu.h
index 1a758f14e0c3..d16aba48fa0a 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -22,7 +22,6 @@
 #define pgd_bad(pgd)   (0)
 #define pgd_clear(pgdp)
 #define kern_addr_valid(addr)  (1)
-#definepmd_offset(a, b)((void *)0)
 /* FIXME */
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
diff

[PATCH v2 09/12] mm: consolidate pte_index() and pte_offset_*() definitions

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

All architectures define pte_index() as

(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)

and all architectures define pte_offset_kernel() as an entry
in the array of PTEs indexed by the pte_index().

For the most architectures the pte_offset_kernel() implementation relies on
the availability of pmd_page_vaddr() that converts a PMD entry value to the
virtual address of the page containing PTEs array.

Let's move x86 definitions of the PTE accessors to the generic place in
 and then simply drop the respective definitions from the
other architectures.

The architectures that didn't provide pmd_page_vaddr() are updated to have
that defined.

The generic implementation of pte_offset_kernel() can be overridden by an
architecture and alpha makes use of this because it has special ordering
requirements for its version of pte_offset_kernel().

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  4 +--
 arch/arc/include/asm/pgtable.h   | 15 --
 arch/arm/include/asm/pgtable.h   | 15 --
 arch/arm64/include/asm/pgtable.h | 10 +++
 arch/csky/include/asm/pgtable.h  | 16 ---
 arch/hexagon/include/asm/pgtable.h   | 25 +++--
 arch/ia64/include/asm/pgtable.h  |  9 --
 arch/m68k/include/asm/mcf_pgtable.h  | 12 +---
 arch/m68k/include/asm/motorola_pgalloc.h |  2 +-
 arch/m68k/include/asm/motorola_pgtable.h | 11 +---
 arch/m68k/include/asm/sun3_pgtable.h | 15 --
 arch/m68k/mm/init.c  |  2 +-
 arch/microblaze/include/asm/pgtable.h| 13 +++--
 arch/mips/include/asm/pgtable-32.h   | 12 
 arch/mips/include/asm/pgtable-64.h   | 11 
 arch/mips/kvm/mmu.c  | 20 +++---
 arch/mips/kvm/trap_emul.c|  2 +-
 arch/nds32/include/asm/pgtable.h | 12 +++-
 arch/nios2/include/asm/pgtable.h | 14 +++---
 arch/openrisc/include/asm/pgtable.h  | 23 
 arch/parisc/include/asm/pgtable.h| 15 +++---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 13 ++---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  8 --
 arch/powerpc/include/asm/nohash/32/pgtable.h | 13 ++---
 arch/powerpc/include/asm/nohash/64/pgtable.h |  7 -
 arch/powerpc/include/asm/pgtable.h   |  7 +
 arch/riscv/include/asm/pgtable.h | 10 ---
 arch/riscv/mm/init.c |  6 ++--
 arch/s390/include/asm/pgtable.h  | 10 ++-
 arch/s390/mm/pageattr.c  |  2 +-
 arch/sh/include/asm/pgtable_32.h | 15 --
 arch/sh/include/asm/pgtable_64.h | 12 
 arch/sparc/include/asm/pgalloc_64.h  |  2 +-
 arch/sparc/include/asm/pgtable_32.h  | 15 --
 arch/sparc/include/asm/pgtable_64.h  | 12 ++--
 arch/sparc/mm/srmmu.c| 10 ---
 arch/um/include/asm/pgtable.h| 13 -
 arch/unicore32/include/asm/pgtable.h |  9 --
 arch/x86/include/asm/pgtable.h   | 19 -
 arch/x86/include/asm/pgtable_32.h| 11 
 arch/x86/include/asm/pgtable_64.h|  4 ---
 arch/xtensa/include/asm/pgtable.h| 10 +--
 include/linux/pgtable.h  | 29 
 43 files changed, 113 insertions(+), 382 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 1c263922beb3..dac20d03b727 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -314,9 +314,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, 
unsigned long address)
smp_read_barrier_depends(); /* see above */
return ret;
 }
-
-#define pte_offset_map(dir,addr)   pte_offset_kernel((dir),(addr))
-#define pte_unmap(pte) do { } while (0)
+#define pte_offset_kernel pte_offset_kernel
 
 extern pgd_t swapper_pg_dir[1024];
 
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 29137bcedd93..1146905f594b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -248,9 +248,6 @@
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)   (virt_to_page(empty_zero_page))
 
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte)  do { } while (0)
-
 #define set_pte(pteptr, pteval)((*(pteptr)) = (pteval))
 #define set_pmd(pmdptr, pmdval)(*(pmdptr) = pmdval)
 
@@ -282,18 +279,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
 #define pte_pfn(pte)   (pte_val(pte) >> PAGE_SHIFT)
-#define __pte_index(addr)  (((addr) >> PAGE_SHIF

[PATCH v2 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

The powerpc 32-bit implementation of pgtable has nice shortcuts for
accessing kernel PMD and PTE for a given virtual address.
Make this helpers available for all architectures.

Signed-off-by: Mike Rapoport 
---
 arch/arc/mm/highmem.c | 10 +---
 arch/arm/mach-sa1100/assabet.c|  2 +-
 arch/arm/mm/highmem.c |  4 ++--
 arch/arm/mm/ioremap.c | 31 
 arch/arm/mm/mm.h  |  5 
 arch/arm/mm/mmu.c |  7 +-
 arch/hexagon/include/asm/fixmap.h |  4 
 arch/m68k/mm/motorola.c   | 26 
 arch/microblaze/kernel/signal.c   |  8 +--
 arch/microblaze/mm/init.c |  9 ---
 arch/mips/include/asm/fixmap.h|  3 ---
 arch/mips/mm/c-r3k.c  | 10 ++--
 arch/mips/mm/c-r4k.c  | 10 ++--
 arch/mips/mm/c-tx39.c | 10 ++--
 arch/mips/mm/highmem.c|  2 +-
 arch/nds32/include/asm/pgtable.h  |  2 --
 arch/nds32/mm/init.c  | 13 ++
 arch/nds32/mm/proc.c  |  6 +
 arch/parisc/mm/fixmap.c   |  6 +
 arch/powerpc/include/asm/pgtable.h| 19 ---
 arch/powerpc/mm/book3s32/mmu.c|  2 +-
 arch/powerpc/mm/book3s32/tlb.c|  4 ++--
 arch/powerpc/mm/kasan/kasan_init_32.c |  8 +++
 arch/powerpc/mm/nohash/40x.c  |  4 ++--
 arch/powerpc/mm/pgtable_32.c  |  2 +-
 arch/s390/mm/pageattr.c   | 10 +---
 arch/sh/mm/cache-sh4.c|  8 +--
 arch/sh/mm/kmap.c |  5 +---
 arch/sparc/mm/highmem.c   | 12 ++
 arch/sparc/mm/init_64.c   |  6 +
 arch/sparc/mm/io-unit.c   | 10 ++--
 arch/sparc/mm/iommu.c |  8 +--
 arch/sparc/mm/srmmu.c | 34 +++
 arch/um/kernel/mem.c  | 10 +---
 arch/um/kernel/trap.c |  8 +--
 arch/unicore32/mm/mm.h| 10 
 arch/x86/mm/init_32.c | 26 +++-
 arch/xtensa/include/asm/fixmap.h  |  8 ---
 arch/xtensa/mm/highmem.c  |  2 +-
 arch/xtensa/mm/kasan_init.c   | 10 ++--
 arch/xtensa/mm/mmu.c  |  5 +---
 include/linux/pgtable.h   | 24 +++
 42 files changed, 80 insertions(+), 323 deletions(-)

diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index d6b74883fd1f..1b9f473c6369 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -92,17 +92,9 @@ EXPORT_SYMBOL(kunmap_atomic_high);
 
 static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 {
-   pgd_t *pgd_k;
-   p4d_t *p4d_k;
-   pud_t *pud_k;
-   pmd_t *pmd_k;
+   pmd_t *pmd_k = pmd_off_k(kvaddr);
pte_t *pte_k;
 
-   pgd_k = pgd_offset_k(kvaddr);
-   p4d_k = p4d_offset(pgd_k, kvaddr);
-   pud_k = pud_offset(p4d_k, kvaddr);
-   pmd_k = pmd_offset(pud_k, kvaddr);
-
pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
if (!pte_k)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 8e3f5fdb4883..aa265ede5730 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -632,7 +632,7 @@ static void __init map_sa1100_gpio_regs( void )
int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO);
pmd_t *pmd;
 
-   pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(virt), virt), 
virt), virt);
+   pmd = pmd_off_k(virt);
*pmd = __pmd(phys | prot);
flush_pmd_entry(pmd);
 }
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index e013f6b81328..187fab227b50 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,7 +18,7 @@
 static inline void set_fixmap_pte(int idx, pte_t pte)
 {
unsigned long vaddr = __fix_to_virt(idx);
-   pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+   pte_t *ptep = virt_to_kpte(vaddr);
 
set_pte_ext(ptep, pte, 0);
local_flush_tlb_kernel_page(vaddr);
@@ -26,7 +26,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte)
 
 static inline pte_t get_fixmap_pte(unsigned long vaddr)
 {
-   pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+   pte_t *ptep = virt_to_kpte(vaddr);
 
return *ptep;
 }
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 75529d76d28c..000e821b 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -141,16 +141,8 @@ void __check_vmalloc_seq(struct mm_struct *mm)
 static void unmap_area_sections(unsigned long virt, unsigned long size)
 {
unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1));
-   pgd_t *pgd;
-   p4d_t *p4d;
-

[PATCH v2 07/12] x86/mm: simplify init_trampoline() and surrounding logic

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

There are three cases for the trampoline initialization:
* 32-bit does nothing
* 64-bit with kaslr disabled simply copies a PGD entry from the direct map
  to the trampoline PGD
* 64-bit with kaslr enabled maps the real mode trampoline at PUD level

These cases are currently differentiated by a bunch of ifdefs inside
asm/include/pgtable.h and the case of 64-bits with kaslr on uses
pgd_index() helper.

Replacing the ifdefs with a static function in arch/x86/mm/init.c gives
clearer code and allows moving pgd_index() to the generic implementation in
include/linux/pgtable.h

Signed-off-by: Mike Rapoport 
---
 arch/x86/include/asm/kaslr.h   |  2 ++
 arch/x86/include/asm/pgtable.h | 15 +--
 arch/x86/include/asm/setup.h   |  9 +
 arch/x86/mm/init.c | 22 ++
 arch/x86/mm/kaslr.c| 33 +
 5 files changed, 35 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index db7ba2feb947..0648190467ba 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 void kernel_randomize_memory(void);
+void init_trampoline_kaslr(void);
 #else
 static inline void kernel_randomize_memory(void) { }
+static inline void init_trampoline_kaslr(void) {}
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 
 #endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d24f8e1f7250..6366136b0e46 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1070,27 +1070,14 @@ void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
 extern void memblock_find_dma_reserve(void);
 
+
 #ifdef CONFIG_X86_64
-/* Realmode trampoline initialization. */
 extern pgd_t trampoline_pgd_entry;
-static inline void __meminit init_trampoline_default(void)
-{
-   /* Default trampoline pgd value */
-   trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
-}
 
 void __init poking_init(void);
 
 unsigned long init_memory_mapping(unsigned long start,
  unsigned long end, pgprot_t prot);
-
-# ifdef CONFIG_RANDOMIZE_MEMORY
-void __meminit init_trampoline(void);
-# else
-#  define init_trampoline init_trampoline_default
-# endif
-#else
-static inline void init_trampoline(void) { }
 #endif
 
 /* local pte updates need not use xchg for locking */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ed8ec011a9fd..d95cacf210bb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -78,6 +78,15 @@ static inline bool kaslr_enabled(void)
return !!(boot_params.hdr.loadflags & KASLR_FLAG);
 }
 
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+   return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
+}
+
 static inline unsigned long kaslr_offset(void)
 {
return (unsigned long)&_text - __START_KERNEL;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 235dd0e35741..e225ebb25197 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -682,6 +682,28 @@ static void __init memory_map_bottom_up(unsigned long 
map_start,
}
 }
 
+/*
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB.  See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
+ */
+static void __init init_trampoline(void)
+{
+#ifdef CONFIG_X86_64
+   if (!kaslr_memory_enabled())
+   trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
+   else
+   init_trampoline_kaslr();
+#endif
+}
+
 void __init init_mem_mapping(void)
 {
unsigned long end;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index faf02e1e1517..fb620fd9dae9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -61,15 +61,6 @@ static inline unsigned long get_padding(struct 
kaslr_memory_region *region)
return (region->size_tb << TB_SHIFT);
 }
 
-/*
- * Apply no randomization if KASLR was disabled at boot or if KASAN
- * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
- */
-static inline bool kaslr_memory_enabled(void)
-{
-   return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
-}
-
 /* Initialize base and padding for each memory region randomized with KASLR */
 void __init kernel_randomize_memory(void)
 {
@@ -148,7 +139,7 @@ void __init kernel_randomize_memory(void)

[PATCH v2 06/12] m68k/mm: move {cache, nocahe}_page() definitions close to their user

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

The cache_page() and nocache_page() functions are only used by the motorola
MMU variant for setting caching attributes for the page table pages.

Move the definitions of these functions from
arch/m68k/include/asm/motorola_pgtable.h closer to their usage in
arch/m68k/mm/motorola.c and drop unused definition in
arch/m68k/include/asm/mcf_pgtable.h.

Signed-off-by: Mike Rapoport 
Acked-by: Greg Ungerer 
---
 arch/m68k/include/asm/mcf_pgtable.h  | 40 -
 arch/m68k/include/asm/motorola_pgtable.h | 44 
 arch/m68k/mm/motorola.c  | 43 +++
 3 files changed, 43 insertions(+), 84 deletions(-)

diff --git a/arch/m68k/include/asm/mcf_pgtable.h 
b/arch/m68k/include/asm/mcf_pgtable.h
index 0031cd387b75..737e826294f3 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -328,46 +328,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
 #define pte_offset_kernel(dir, address) \
((pte_t *) __pmd_page(*(dir)) + __pte_offset(address))
 
-/*
- * Disable caching for page at given kernel virtual address.
- */
-static inline void nocache_page(void *vaddr)
-{
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-   unsigned long addr = (unsigned long) vaddr;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mknocache(*ptep);
-}
-
-/*
- * Enable caching for page at given kernel virtual address.
- */
-static inline void cache_page(void *vaddr)
-{
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-   unsigned long addr = (unsigned long) vaddr;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mkcache(*ptep);
-}
-
 /*
  * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e))
  */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h 
b/arch/m68k/include/asm/motorola_pgtable.h
index 9e5a3de21e15..e1594acf7c7e 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -227,50 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, 
unsigned long address)
 #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) 
>> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 #define pte_unmap(pte) ((void)0)
 
-/* Prior to calling these routines, the page should have been flushed
- * from both the cache and ATC, or the CPU might not notice that the
- * cache setting for the page has been changed. -jskov
- */
-static inline void nocache_page(void *vaddr)
-{
-   unsigned long addr = (unsigned long)vaddr;
-
-   if (CPU_IS_040_OR_060) {
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mknocache(*ptep);
-   }
-}
-
-static inline void cache_page(void *vaddr)
-{
-   unsigned long addr = (unsigned long)vaddr;
-
-   if (CPU_IS_040_OR_060) {
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mkcache(*ptep);
-   }
-}
-
 /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
 #define __swp_type(x)  (((x).val >> 4) & 0xff)
 #define __swp_offset(x)((x).val >> 12)
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 904c2a663977..8e5e74121a78 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -45,6 +45,49 @@ unsigned long mm_cachebits;
 EXPORT_SYMBOL(mm_cachebits);
 #endif
 
+/* Prior to calling these routines, the page should have been flushed
+ * from both the cache and ATC, or the CPU might not notice that the
+ * cache setting for the page has been changed. -jskov
+ */
+static inline void nocache_page(void *vaddr)
+{
+   unsigned long addr = (unsigned long)vaddr;
+
+   if (CPU_IS_040_OR_060) {
+   pgd_t *dir;
+   p4d_t *p4dp;
+   pud_t *pudp;
+   pmd_t *pmdp;
+

[PATCH v2 05/12] m68k/mm/motorola: move comment about page table allocation funcitons

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

The comment about page table allocation functions resides in
include/asm/motorola_pgtable.h while the functions live in
include/asm/motorola_pgaloc.h.

Move the comment close to the code.

Signed-off-by: Mike Rapoport 
---
 arch/m68k/include/asm/motorola_pgalloc.h | 6 ++
 arch/m68k/include/asm/motorola_pgtable.h | 6 --
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/m68k/include/asm/motorola_pgalloc.h 
b/arch/m68k/include/asm/motorola_pgalloc.h
index c66e42917912..f3cb453a07b7 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -18,6 +18,12 @@ extern void init_pointer_table(void *table, int type);
 extern void *get_pointer_table(int type);
 extern int free_pointer_table(void *table, int type);
 
+/*
+ * Allocate and free page tables. The xxx_kernel() versions are
+ * used to allocate a kernel page table - this turns on ASN bits
+ * if any.
+ */
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
return get_pointer_table(TABLE_PTE);
diff --git a/arch/m68k/include/asm/motorola_pgtable.h 
b/arch/m68k/include/asm/motorola_pgtable.h
index 48f19f0ab1e7..9e5a3de21e15 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -227,12 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, 
unsigned long address)
 #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) 
>> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 #define pte_unmap(pte) ((void)0)
 
-/*
- * Allocate and free page tables. The xxx_kernel() versions are
- * used to allocate a kernel page table - this turns on ASN bits
- * if any.
- */
-
 /* Prior to calling these routines, the page should have been flushed
  * from both the cache and ATC, or the CPU might not notice that the
  * cache setting for the page has been changed. -jskov
-- 
2.26.2

[PATCH v2 04/12] csky: replace definitions of __pXd_offset() with pXd_index()

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

All architectures use pXd_index() to get an entry in the page table page
corresponding to a virtual address.

Align csky with other architectures.

Signed-off-by: Mike Rapoport 
---
 arch/csky/include/asm/pgtable.h | 5 ++---
 arch/csky/mm/fault.c| 2 +-
 arch/csky/mm/highmem.c  | 2 +-
 arch/csky/mm/init.c | 6 +++---
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 9d2d16db237d..2eff4aea51b3 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -229,9 +229,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define __pgd_offset(address)  pgd_index(address)
-#define __pud_offset(address)  (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)  (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address)  pgd_offset(_mm, address)
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 4e6dc68f3258..4055d430c0c8 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned 
long write,
 * Do _not_ use "tsk" here. We might be inside
 * an interrupt in the middle of a task switch..
 */
-   int offset = __pgd_offset(address);
+   int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index 3b3f622f5ae9..89ec32e602a1 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -92,7 +92,7 @@ static void __init kmap_pages_init(void)
vaddr = PKMAP_BASE;
fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
 
-   pgd = swapper_pg_dir + __pgd_offset(vaddr);
+   pgd = swapper_pg_dir + pgd_index(vaddr);
pud = (pud_t *)pgd;
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index eda2b4291485..af627128314f 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -157,9 +157,9 @@ void __init fixrange_init(unsigned long start, unsigned 
long end,
unsigned long vaddr;
 
vaddr = start;
-   i = __pgd_offset(vaddr);
-   j = __pud_offset(vaddr);
-   k = __pmd_offset(vaddr);
+   i = pgd_index(vaddr);
+   j = pud_index(vaddr);
+   k = pmd_index(vaddr);
pgd = pgd_base + i;
 
for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
-- 
2.26.2

[PATCH v2 03/12] mm: reorder includes after introduction of linux/pgtable.h

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

The replacement of  with  made the include
of the latter in the middle of asm includes. Fix this up with the aid of
the below script and manual adjustments here and there.

import sys
import re

if len(sys.argv) is not 3:
print "USAGE: %s  " % (sys.argv[0])
sys.exit(1)

hdr_to_move="#include " % sys.argv[2]
moved = False
in_hdrs = False

with open(sys.argv[1], "r") as f:
lines = f.readlines()
for _line in lines:
line = _line.rstrip('\n')
if line == hdr_to_move:
continue
if line.startswith("#include 
---
 arch/alpha/kernel/proto.h | 2 --
 arch/arc/mm/highmem.c | 2 +-
 arch/arc/mm/tlbex.S   | 2 +-
 arch/arm/include/asm/efi.h| 1 -
 arch/arm/include/asm/fixmap.h | 2 +-
 arch/arm/kernel/head.S| 2 +-
 arch/arm/kernel/suspend.c | 2 +-
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 arch/arm/mach-integrator/core.c   | 2 +-
 arch/arm/mach-keystone/platsmp.c  | 2 +-
 arch/arm/mach-sa1100/hackkit.c| 2 +-
 arch/arm/mach-zynq/common.c   | 2 +-
 arch/arm/mm/idmap.c   | 2 +-
 arch/arm/mm/mm.h  | 1 -
 arch/arm/mm/proc-arm1020.S| 2 +-
 arch/arm/mm/proc-arm1020e.S   | 2 +-
 arch/arm/mm/proc-arm1022.S| 2 +-
 arch/arm/mm/proc-arm1026.S| 2 +-
 arch/arm/mm/proc-arm720.S | 2 +-
 arch/arm/mm/proc-arm740.S | 2 +-
 arch/arm/mm/proc-arm7tdmi.S   | 2 +-
 arch/arm/mm/proc-arm920.S | 2 +-
 arch/arm/mm/proc-arm922.S | 2 +-
 arch/arm/mm/proc-arm925.S | 2 +-
 arch/arm/mm/proc-arm926.S | 2 +-
 arch/arm/mm/proc-arm940.S | 2 +-
 arch/arm/mm/proc-arm946.S | 2 +-
 arch/arm/mm/proc-arm9tdmi.S   | 2 +-
 arch/arm/mm/proc-fa526.S  | 2 +-
 arch/arm/mm/proc-feroceon.S   | 2 +-
 arch/arm/mm/proc-mohawk.S | 2 +-
 arch/arm/mm/proc-sa110.S  | 2 +-
 arch/arm/mm/proc-sa1100.S | 2 +-
 arch/arm/mm/proc-v6.S | 2 +-
 arch/arm/mm/proc-v7.S | 2 +-
 arch/arm/mm/proc-xsc3.S   | 2 +-
 arch/arm/mm/proc-xscale.S | 2 +-
 arch/arm/mm/pv-fixup-asm.S| 2 +-
 arch/arm64/include/asm/io.h   | 2 +-
 arch/arm64/include/asm/kvm_mmu.h  | 2 +-
 arch/arm64/include/asm/mmu_context.h  | 2 +-
 arch/arm64/include/asm/vmap_stack.h   | 2 +-
 arch/arm64/kernel/acpi.c  | 2 +-
 arch/arm64/kernel/head.S  | 2 +-
 arch/arm64/kernel/kaslr.c | 2 +-
 arch/arm64/kernel/suspend.c   | 2 +-
 arch/arm64/kernel/vmlinux.lds.S   | 1 -
 arch/arm64/mm/proc.S  | 2 +-
 arch/ia64/kernel/entry.S  | 2 +-
 arch/ia64/kernel/head.S   | 3 ++-
 arch/ia64/kernel/irq_ia64.c   | 2 +-
 arch/ia64/kernel/ivt.S| 2 +-
 arch/ia64/kernel/kprobes.c| 2 +-
 arch/ia64/kernel/mca_asm.S| 2 +-
 arch/ia64/kernel/relocate_kernel.S| 4 +---
 arch/ia64/kernel/setup.c  | 2 +-
 arch/ia64/kernel/uncached.c   | 2 +-
 arch/ia64/kernel/vmlinux.lds.S| 2 +-
 arch/m68k/68000/m68VZ328.c| 2 +-
 arch/m68k/include/asm/sun3xflop.h | 2 +-
 arch/m68k/kernel/head.S   | 2 +-
 arch/microblaze/include/asm/pgalloc.h | 2 +-
 arch/microblaze/kernel/hw_exception_handler.S | 2 +-
 arch/microblaze/kernel/module.c   | 2 +-
 arch/microblaze/kernel/setup.c| 2 +-
 arch/microblaze/mm/pgtable.c  | 2 +-
 arch/mips/jazz/irq.c  | 2 +-
 arch/mips/jazz/setup.c| 2 +-
 arch/mips/kvm/mips.c

[PATCH v2 01/12] mm: don't include asm/pgtable.h if linux/mm.h is already included

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

The linux/mm.h header includes  to allow inlining of the
functions involving page table manipulations, e.g. pte_alloc() and
pmd_alloc(). So, there is no point to explicitly include  in
the files that include .

The include statements in such cases are remove with a simple loop:

for f in $(git grep -l "include ") ; do
sed -i -e '/include / d' $f
done

Signed-off-by: Mike Rapoport 
---
 arch/alpha/boot/bootp.c   | 1 -
 arch/alpha/boot/bootpz.c  | 1 -
 arch/alpha/boot/main.c| 1 -
 arch/alpha/include/asm/io.h   | 1 -
 arch/alpha/kernel/process.c   | 1 -
 arch/alpha/kernel/ptrace.c| 1 -
 arch/alpha/kernel/setup.c | 1 -
 arch/alpha/kernel/smp.c   | 1 -
 arch/alpha/kernel/sys_alcor.c | 1 -
 arch/alpha/kernel/sys_cabriolet.c | 1 -
 arch/alpha/kernel/sys_dp264.c | 1 -
 arch/alpha/kernel/sys_eb64p.c | 1 -
 arch/alpha/kernel/sys_eiger.c | 1 -
 arch/alpha/kernel/sys_jensen.c| 1 -
 arch/alpha/kernel/sys_marvel.c| 1 -
 arch/alpha/kernel/sys_miata.c | 1 -
 arch/alpha/kernel/sys_mikasa.c| 1 -
 arch/alpha/kernel/sys_nautilus.c  | 1 -
 arch/alpha/kernel/sys_noritake.c  | 1 -
 arch/alpha/kernel/sys_rawhide.c   | 1 -
 arch/alpha/kernel/sys_ruffian.c   | 1 -
 arch/alpha/kernel/sys_rx164.c | 1 -
 arch/alpha/kernel/sys_sable.c | 1 -
 arch/alpha/kernel/sys_sio.c   | 1 -
 arch/alpha/kernel/sys_sx164.c | 1 -
 arch/alpha/kernel/sys_takara.c| 1 -
 arch/alpha/kernel/sys_titan.c | 1 -
 arch/alpha/kernel/sys_wildfire.c  | 1 -
 arch/alpha/mm/init.c  | 1 -
 arch/arm/kernel/machine_kexec.c   | 1 -
 arch/arm/kernel/module.c  | 1 -
 arch/arm/kernel/ptrace.c  | 1 -
 arch/arm/kernel/smp.c | 1 -
 arch/arm/mach-ebsa110/core.c  | 1 -
 arch/arm/mach-footbridge/common.c | 1 -
 arch/arm/mach-imx/mm-imx21.c  | 1 -
 arch/arm/mach-imx/mm-imx27.c  | 1 -
 arch/arm/mach-imx/mm-imx3.c   | 1 -
 arch/arm/mach-iop32x/i2c.c| 1 -
 arch/arm/mach-iop32x/iq31244.c| 1 -
 arch/arm/mach-iop32x/iq80321.c| 1 -
 arch/arm/mach-iop32x/n2100.c  | 1 -
 arch/arm/mach-ixp4xx/common.c | 1 -
 arch/arm/mach-sa1100/assabet.c| 1 -
 arch/arm/mm/copypage-v4mc.c   | 1 -
 arch/arm/mm/copypage-v6.c | 1 -
 arch/arm/mm/copypage-xscale.c | 1 -
 arch/arm/mm/dump.c| 1 -
 arch/arm/mm/fault-armv.c  | 1 -
 arch/arm/mm/fault.c   | 1 -
 arch/arm/mm/pageattr.c| 1 -
 arch/arm64/kernel/hibernate.c | 1 -
 arch/arm64/kernel/ptrace.c| 1 -
 arch/arm64/kernel/smp.c   | 1 -
 arch/arm64/mm/dump.c  | 1 -
 arch/arm64/mm/fault.c | 1 -
 arch/arm64/mm/kasan_init.c| 1 -
 arch/arm64/mm/pageattr.c  | 1 -
 arch/csky/kernel/module.c | 1 -
 arch/csky/kernel/ptrace.c | 1 -
 arch/csky/mm/init.c   | 1 -
 arch/csky/mm/tlb.c| 1 -
 arch/h8300/kernel/process.c   | 1 -
 arch/h8300/kernel/setup.c | 1 -
 arch/h8300/kernel/signal.c| 1 -
 arch/h8300/mm/fault.c | 1 -
 arch/h8300/mm/init.c  | 1 -
 arch/h8300/mm/memory.c| 1 -
 arch/hexagon/mm/vm_fault.c| 1 -
 arch/ia64/kernel/efi.c| 1 -
 arch/ia64/kernel/ptrace.c | 1 -
 arch/ia64/kernel/smp.c| 1 -
 arch/ia64/kernel/smpboot.c| 1 -
 arch/ia64/mm/contig.c | 1 -
 arch/ia64/mm/fault.c  | 1 -
 arch/m68k/68000/timers.c  | 1 -
 arch/m68k/amiga/config.c  | 1 -
 arch/m68k/apollo/config.c | 1 -
 arch/m68k/atari/atasound.c| 1 -
 arch/m68k/atari/stram.c   | 1 -
 arch/m68k/bvme6000/config.c   | 1 -
 arch/m68k/kernel/process.c| 1 -
 arch/m68k/kernel/ptrace.c | 1 -
 arch/m68k/kernel/setup_no.c   | 1 -
 arch/m68k/kernel/signal.c | 1 -
 arch/m68k/kernel/uboot.c

[PATCH v2 00/12] mm: consolidate definitions of page table accessors

2020-05-14 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

The low level page table accessors (pXY_index(), pXY_offset()) are
duplicated across all architectures and sometimes more than once. For
instance, we have 31 definition of pgd_offset() for 25 supported
architectures.

Most of these definitions are actually identical and typically it boils
down to, e.g. 

static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}

static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}

These definitions can be shared among 90% of the arches provided XYZ_SHIFT,
PTRS_PER_XYZ and xyz_page_vaddr() are defined.

For architectures that really need a custom version there is always
possibility to override the generic version with the usual ifdefs magic.

These patches introduce include/linux/pgtable.h that replaces
include/asm-generic/pgtable.h and add the definitions of the page table
accessors to the new header.

The patches are vs the v5.7-rc5-mmotm-2020-05-13-20-30

v2 changes:
* s/morotola/motorola in the changelog of patch 6
* replace FIXME with a comment about pmf_off and virt_to_kpte in patch 8
* rebase on v5.7-rc5-mmotm-2020-05-13-20-30

Mike Rapoport (12):
  mm: don't include asm/pgtable.h if linux/mm.h is already included
  mm: introduce include/linux/pgtable.h
  mm: reorder includes after introduction of linux/pgtable.h
  csky: replace definitions of __pXd_offset() with pXd_index()
  m68k/mm/motorola: move comment about page table allocation funcitons
  m68k/mm: move {cache,nocahe}_page() definitions close to their user
  x86/mm: simplify init_trampoline() and surrounding logic
  mm: pgtable: add shortcuts for accessing kernel PMD and PTE
  mm: consolidate pte_index() and pte_offset_*() definitions
  mm: consolidate pmd_index() and pmd_offset() definitions
  mm: consolidate pud_index() and pud_offset() definitions
  mm: consolidate pgd_index() and pgd_offset{_k}() definitions

 arch/alpha/boot/bootp.c   |   1 -
 arch/alpha/boot/bootpz.c  |   1 -
 arch/alpha/boot/main.c|   1 -
 arch/alpha/include/asm/io.h   |   1 -
 arch/alpha/include/asm/pgtable.h  |  16 +--
 arch/alpha/kernel/process.c   |   1 -
 arch/alpha/kernel/proto.h |   2 -
 arch/alpha/kernel/ptrace.c|   1 -
 arch/alpha/kernel/setup.c |   1 -
 arch/alpha/kernel/smp.c   |   1 -
 arch/alpha/kernel/sys_alcor.c |   1 -
 arch/alpha/kernel/sys_cabriolet.c |   1 -
 arch/alpha/kernel/sys_dp264.c |   1 -
 arch/alpha/kernel/sys_eb64p.c |   1 -
 arch/alpha/kernel/sys_eiger.c |   1 -
 arch/alpha/kernel/sys_jensen.c|   1 -
 arch/alpha/kernel/sys_marvel.c|   1 -
 arch/alpha/kernel/sys_miata.c |   1 -
 arch/alpha/kernel/sys_mikasa.c|   1 -
 arch/alpha/kernel/sys_nautilus.c  |   1 -
 arch/alpha/kernel/sys_noritake.c  |   1 -
 arch/alpha/kernel/sys_rawhide.c   |   1 -
 arch/alpha/kernel/sys_ruffian.c   |   1 -
 arch/alpha/kernel/sys_rx164.c |   1 -
 arch/alpha/kernel/sys_sable.c |   1 -
 arch/alpha/kernel/sys_sio.c   |   1 -
 arch/alpha/kernel/sys_sx164.c |   1 -
 arch/alpha/kernel/sys_takara.c|   1 -
 arch/alpha/kernel/sys_titan.c |   1 -
 arch/alpha/kernel/sys_wildfire.c  |   1 -
 arch/alpha/mm/init.c  |   1 -
 arch/arc/include/asm/pgtable.h|  24 
 arch/arc/mm/highmem.c |  12 +-
 arch/arc/mm/tlbex.S   |   2 +-
 arch/arm/include/asm/efi.h|   1 -
 arch/arm/include/asm/fixmap.h |   2 +-
 arch/arm/include/asm/idmap.h  |   2 +-
 arch/arm/include/asm/pgtable-2level.h |   1 +
 arch/arm/include/asm/pgtable-3level.h |   7 -
 arch/arm/include/asm/pgtable-nommu.h  |   3 -
 arch/arm/include/asm/pgtable.h|  25 
 arch/arm/kernel/head.S|   2 +-
 arch/arm/kernel/machine_kexec.c   |   1 -
 arch/arm/kernel/module.c  |   1 -
 arch/arm/kernel/ptrace.c  |   1 -
 arch/arm/kernel/smp.c |   1 -
 arch/arm/kernel/suspend.c |   2 +-
 arch/arm/kernel/vmlinux.lds.S |   2 +-
 arch/arm/mach-ebsa110/core.c  |   1 -
 arch/arm/mach-footbridge/common.c |   1 -
 arch/arm/mach-imx/mm-imx21.c  |   1 -
 arch/arm/mach-imx/mm-imx27.c  |   1 -
 arch/arm/mach-imx/mm-imx3.c   |   1 -
 arch/arm/mach-integra

Re: [PATCH 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE

2020-05-12 Thread Mike Rapoport

On Tue, May 12, 2020 at 12:24:41PM -0700, Matthew Wilcox wrote:
> On Tue, May 12, 2020 at 09:44:18PM +0300, Mike Rapoport wrote:
> > +++ b/include/linux/pgtable.h
> > @@ -28,6 +28,24 @@
> >  #define USER_PGTABLES_CEILING  0UL
> >  #endif
> >  
> > +/* FIXME: */
> 
> Fix you what?  Add documentation?

Ouch, indeed :)

> > +static inline pmd_t *pmd_off(struct mm_struct *mm, unsigned long va)
> > +{
> > +   return pmd_offset(pud_offset(p4d_offset(pgd_offset(mm, va), va), va), 
> > va);
> > +}

-- 
Sincerely yours,
Mike.

Re: [PATCH 03/12] mm: reorder includes after introduction of linux/pgtable.h

2020-05-12 Thread Mike Rapoport

On Tue, May 12, 2020 at 12:20:13PM -0700, Matthew Wilcox wrote:
> On Tue, May 12, 2020 at 09:44:13PM +0300, Mike Rapoport wrote:
> > diff --git a/arch/alpha/kernel/proto.h b/arch/alpha/kernel/proto.h
> > index a093cd45ec79..701a05090141 100644
> > --- a/arch/alpha/kernel/proto.h
> > +++ b/arch/alpha/kernel/proto.h
> > @@ -2,8 +2,6 @@
> >  #include 
> >  #include 
> >  
> > -#include 
> > -
> >  /* Prototypes of functions used across modules here in this directory.  */
> >  
> >  #define vucp   volatile unsigned char  *
> 
> Looks like your script has a bug if linux/pgtable.h is the last include
> in the file?

Script indeed cannot handle all the corner case, but this is not one of
them.
I've started initially to look into removing asm/pgtable.h if it was not
needed, but I've run out of patience very soon. This file is what
sneaked in from that attempt.

-- 
Sincerely yours,
Mike.

[PATCH 12/12] mm: consolidate pgd_index() and pgd_offset{_k}() definitions

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

All architectures tables define pgd_offset() as an entry in the array of
PGDs indexed by the pgd_index(), where pgd_index() is

(address >> PGD_SHIFT) & (PTRS_PER_PGD - 1)

For the most cases, the pgd_offset() uses mm->pgd as the pointer to the
top-level page directory and the pgd_offset_k() is a helper that presumes
that mm == _mm.

Use x86 implementation as the generic one and remove redundant definitions
of PGD accessors in most of arch/*/include/asm/pgtable.h

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* ia64 has custom implementation of pgd_index()
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  9 
 arch/arc/include/asm/pgtable.h   |  7 ---
 arch/arm/include/asm/pgtable.h   |  8 
 arch/arm64/include/asm/pgtable.h | 10 
 arch/arm64/kernel/hibernate.c|  4 +-
 arch/arm64/mm/kasan_init.c   |  2 +-
 arch/arm64/mm/mmu.c  |  8 ++--
 arch/csky/include/asm/pgtable.h  | 11 -
 arch/hexagon/include/asm/pgtable.h   | 18 ---
 arch/ia64/include/asm/pgtable.h  | 14 +-
 arch/m68k/include/asm/mcf_pgtable.h  | 11 -
 arch/m68k/include/asm/motorola_pgtable.h | 16 ---
 arch/m68k/include/asm/sun3_pgtable.h |  9 
 arch/microblaze/include/asm/pgtable.h|  7 ---
 arch/mips/include/asm/pgtable-32.h   |  8 
 arch/mips/include/asm/pgtable-64.h   |  8 
 arch/nds32/include/asm/pgtable.h |  6 ---
 arch/nios2/include/asm/pgtable.h |  8 
 arch/openrisc/include/asm/pgtable.h  | 10 
 arch/parisc/include/asm/pgtable.h|  9 
 arch/powerpc/include/asm/book3s/32/pgtable.h |  7 ---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 13 -
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ---
 arch/powerpc/include/asm/nohash/64/pgtable.h | 12 -
 arch/riscv/include/asm/pgtable.h | 10 
 arch/riscv/mm/init.c | 12 ++---
 arch/s390/include/asm/pgtable.h  |  1 -
 arch/sh/include/asm/pgtable_32.h |  7 ---
 arch/sh/include/asm/pgtable_64.h | 11 -
 arch/sparc/include/asm/pgtable_32.h  |  8 
 arch/sparc/include/asm/pgtable_64.h  |  7 ---
 arch/um/include/asm/pgtable.h| 50 ++--
 arch/unicore32/include/asm/pgtable.h |  8 
 arch/x86/include/asm/pgtable.h   | 24 --
 arch/xtensa/include/asm/pgtable.h|  8 
 include/linux/pgtable.h  | 26 ++
 36 files changed, 55 insertions(+), 339 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 314973d2810d..162c17b2631f 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -276,15 +276,6 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) 
&= ~_PAGE_FOW; return
 extern inline pte_t pte_mkdirty(pte_t pte) { pte_val(pte) |= __DIRTY_BITS; 
return pte; }
 extern inline pte_t pte_mkyoung(pte_t pte) { pte_val(pte) |= 
__ACCESS_BITS; return pte; }
 
-#define PAGE_DIR_OFFSET(tsk,address) pgd_offset((tsk),(address))
-
-/* to find an entry in a kernel page-table-directory */
-#define pgd_offset_k(address) pgd_offset(_mm, (address))
-
-/* to find an entry in a page-table-directory. */
-#define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-#define pgd_offset(mm, address)((mm)->pgd+pgd_index(address))
-
 /*
  * The smp_read_barrier_depends() in the following functions are required to
  * order the load of *dir (the pointer in the top level page table) with any
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 1146905f594b..f1ed17edb085 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -316,13 +316,6 @@ static inline void set_pte_at(struct mm_struct *mm, 
unsigned long addr,
set_pte(ptep, pteval);
 }
 
-/*
- * All kernel related VM pages are in init's mm.
- */
-#define pgd_offset_k(address)  pgd_offset(_mm, address)
-#define pgd_index(addr)((addr) >> PGDIR_SHIFT)
-#define pgd_offset(mm, addr)   (((mm)->pgd)+pgd_index(addr))
-
 /*
  * Macro to quickly access the PGD entry, utlising the fact that some
  * arch may cache the pointer to Page Directory of "current" task
diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
index 41543bc47660..c02f24400369 100644
--- a/arch/arm/include/asm/pgtable.h
+++ b/arch/arm/include/asm/pgtable.h
@@ -166,14 +166,6 @@ extern struct page *empty_zero_page;
 
 extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
 
-/* to find an entry in

[PATCH 11/12] mm: consolidate pud_index() and pud_offset() definitions

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

All architectures that have at least four-level page tables define
pud_offset() as an entry in the array of PUDs indexed by the pud_index(),
where pud_index() is

(address >> PUD_SHIFT) & (PTRS_PER_PUD - 1)

For the most architectures the pud_offset() implementation relies on
the availability of pud_page_vaddr() that converts a PUD entry value to the
virtual address of the page containing PUD array.

Let's use such implementation as a generic and drop most of the definitions
of pud_index() and pud_offset() in  files.

The architectures that didn't provide pud_page_vaddr() are updated to
have that defined.

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/arm64/include/asm/pgtable.h|  8 +---
 arch/csky/include/asm/pgtable.h |  2 --
 arch/ia64/include/asm/pgtable.h |  6 --
 arch/mips/include/asm/pgtable-32.h  |  1 -
 arch/mips/include/asm/pgtable-64.h  |  7 ---
 arch/powerpc/include/asm/book3s/64/pgtable.h|  4 
 arch/powerpc/include/asm/nohash/64/pgtable-4k.h |  4 
 arch/s390/include/asm/pgtable.h |  1 +
 arch/sh/include/asm/pgtable_32.h|  2 --
 arch/sh/include/asm/pgtable_64.h|  2 --
 arch/sparc/include/asm/pgtable_64.h |  5 -
 arch/x86/include/asm/pgtable.h  | 11 ---
 include/asm-generic/pgtable-nopud.h |  1 +
 include/linux/pgtable.h | 16 
 14 files changed, 23 insertions(+), 47 deletions(-)

diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h
index d0175335a2f2..c0155814b374 100644
--- a/arch/arm64/include/asm/pgtable.h
+++ b/arch/arm64/include/asm/pgtable.h
@@ -626,11 +626,13 @@ static inline phys_addr_t p4d_page_paddr(p4d_t p4d)
return __p4d_to_phys(p4d);
 }
 
-/* Find an entry in the frst-level page table. */
-#define pud_index(addr)(((addr) >> PUD_SHIFT) & (PTRS_PER_PUD 
- 1))
+static inline unsigned long p4d_page_vaddr(p4d_t p4d)
+{
+   return (unsigned long)__va(p4d_page_paddr(p4d));
+}
 
+/* Find an entry in the frst-level page table. */
 #define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + 
pud_index(addr) * sizeof(pud_t))
-#define pud_offset(dir, addr)  ((pud_t *)__va(pud_offset_phys((dir), 
(addr
 
 #define pud_set_fixmap(addr)   ((pud_t *)set_fixmap_offset(FIX_PUD, 
addr))
 #define pud_set_fixmap_offset(p4d, addr)   
pud_set_fixmap(pud_offset_phys(p4d, addr))
diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index dc613f20e2e1..c5ab20970857 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -220,8 +220,6 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address)  pgd_offset(_mm, address)
 
diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h
index d75d981e6ee1..4c24e5e18bff 100644
--- a/arch/ia64/include/asm/pgtable.h
+++ b/arch/ia64/include/asm/pgtable.h
@@ -383,12 +383,6 @@ pgd_offset (const struct mm_struct *mm, unsigned long 
address)
here.  */
 #define pgd_offset_gate(mm, addr)  pgd_offset_k(addr)
 
-#if CONFIG_PGTABLE_LEVELS == 4
-/* Find an entry in the second-level page table.. */
-#define pud_offset(dir,addr) \
-   ((pud_t *) p4d_page_vaddr(*(dir)) + (((addr) >> PUD_SHIFT) & 
(PTRS_PER_PUD - 1)))
-#endif
-
 /* atomic versions of the some PTE manipulations: */
 
 static inline int
diff --git a/arch/mips/include/asm/pgtable-32.h 
b/arch/mips/include/asm/pgtable-32.h
index 05b51c344939..9c0e7a5ffc75 100644
--- a/arch/mips/include/asm/pgtable-32.h
+++ b/arch/mips/include/asm/pgtable-32.h
@@ -199,7 +199,6 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t 
prot)
 #define pgd_offset_k(address) pgd_offset(_mm, address)
 
 #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 
 /* to find an entry in a page-table-directory */
 #define pgd_offset(mm, addr)   ((mm)->pgd + pgd_index(addr))
diff --git a/arch/mips/include/asm/pgtable-64.h 
b/arch/mips/include/asm/pgtable-64.h
index 21fb55510d35..38170fdac5bf 100644
--- a/arch/mips/include/asm/pgtable-64.h
+++ b/arch/mips/include/asm/pgtable-64.h
@@ -172,8 +172,6 @@
 
 extern pte_t invalid_pte_table[PTRS_PER_PTE];
 
-#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))
-
 #ifndef __PAGETABLE_PUD_FOLDED
 /*
  * For 4-level

[PATCH 10/12] mm: consolidate pmd_index() and pmd_offset() definitions

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

All architectures define pmd_index() as

(address >> PMD_SHIFT) & (PTRS_PER_PMD - 1)

and all architectures that have at least three-level page tables define
pmd_offset() as an entry in the array of PMDs indexed by the pmd_index().

For the most architectures the pmd_offset() implementation relies on
the availability of pud_page_vaddr() that converts a PMD entry value to the
virtual address of the page containing PMD array.

Let's use such implementation as a generic and drop most of the definitions
of pmd_index() and pmd_offset() in  files.

The architectures that didn't provide pud_page_vaddr() are updated to
have that defined.

The generic implementation can be overridden by an architecture and this
ability is currently in use by there architectures:
* alpha has special requirements for memory access ordering
* arm has custom definition of folded 2-level page tables
* s390 has custom definitions of all page table accessors

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  1 +
 arch/arm/include/asm/pgtable-2level.h|  1 +
 arch/arm/include/asm/pgtable-3level.h|  7 -
 arch/arm/include/asm/pgtable-nommu.h |  1 -
 arch/arm64/include/asm/pgtable.h |  8 --
 arch/c6x/include/asm/pgtable.h   |  1 -
 arch/csky/include/asm/pgtable.h  |  1 -
 arch/hexagon/include/asm/pgtable.h   |  9 --
 arch/ia64/include/asm/pgtable.h  |  4 ---
 arch/m68k/include/asm/motorola_pgtable.h |  7 -
 arch/microblaze/include/asm/pgtable.h|  1 -
 arch/mips/include/asm/pgtable-32.h   |  1 -
 arch/mips/include/asm/pgtable-64.h   |  6 
 arch/parisc/include/asm/pgtable.h|  8 --
 arch/parisc/kernel/pci-dma.c |  2 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h |  3 --
 arch/powerpc/include/asm/nohash/64/pgtable.h |  3 --
 arch/riscv/include/asm/pgtable-64.h  |  7 -
 arch/riscv/mm/init.c | 12 
 arch/s390/include/asm/pgtable.h  |  1 +
 arch/sh/include/asm/pgtable-3level.h |  7 -
 arch/sh/include/asm/pgtable_32.h |  1 -
 arch/sh/include/asm/pgtable_64.h |  1 -
 arch/sparc/include/asm/pgtable_32.h  |  9 +-
 arch/sparc/include/asm/pgtable_64.h  |  7 +
 arch/um/include/asm/pgtable-3level.h |  4 ---
 arch/um/include/asm/pgtable.h|  4 ---
 arch/x86/include/asm/pgtable.h   | 17 
 include/asm-generic/pgtable-nopmd.h  |  1 +
 include/linux/pgtable.h  | 29 ++--
 30 files changed, 44 insertions(+), 120 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index dac20d03b727..314973d2810d 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -305,6 +305,7 @@ extern inline pmd_t * pmd_offset(pud_t * dir, unsigned long 
address)
smp_read_barrier_depends(); /* see above */
return ret;
 }
+#define pmd_offset pmd_offset
 
 /* Find an entry in the third-level page table.. */
 extern inline pte_t * pte_offset_kernel(pmd_t * dir, unsigned long address)
diff --git a/arch/arm/include/asm/pgtable-2level.h 
b/arch/arm/include/asm/pgtable-2level.h
index 9e084a464a97..3502c2f746ca 100644
--- a/arch/arm/include/asm/pgtable-2level.h
+++ b/arch/arm/include/asm/pgtable-2level.h
@@ -187,6 +187,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long 
addr)
 {
return (pmd_t *)pud;
 }
+#define pmd_offset pmd_offset
 
 #define pmd_large(pmd) (pmd_val(pmd) & 2)
 #define pmd_leaf(pmd)  (pmd_val(pmd) & 2)
diff --git a/arch/arm/include/asm/pgtable-3level.h 
b/arch/arm/include/asm/pgtable-3level.h
index 1933aed9f68d..fbb6693c3352 100644
--- a/arch/arm/include/asm/pgtable-3level.h
+++ b/arch/arm/include/asm/pgtable-3level.h
@@ -133,13 +133,6 @@ static inline pmd_t *pud_page_vaddr(pud_t pud)
return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK);
 }
 
-/* Find an entry in the second-level page table.. */
-#define pmd_index(addr)(((addr) >> PMD_SHIFT) & (PTRS_PER_PMD 
- 1))
-static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
-{
-   return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr);
-}
-
 #define pmd_bad(pmd)   (!(pmd_val(pmd) & 2))
 
 #define copy_pmd(pmdpd,pmdps)  \
diff --git a/arch/arm/include/asm/pgtable-nommu.h 
b/arch/arm/include/asm/pgtable-nommu.h
index 1a758f14e0c3..d16aba48fa0a 100644
--- a/arch/arm/include/asm/pgtable-nommu.h
+++ b/arch/arm/include/asm/pgtable-nommu.h
@@ -22,7 +22,6 @@
 #define pgd_bad(pgd)   (0)
 #define pgd_clear(pgdp)
 #define kern_addr_valid(addr)  (1)
-#definepmd_offset(a, b)((void *)0)
 /* FIXME */
 /*
  * PMD_SHIFT determines the size of the area a second-level page table can map
diff

[PATCH 09/12] mm: consolidate pte_index() and pte_offset_*() definitions

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

All architectures define pte_index() as

(address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)

and all architectures define pte_offset_kernel() as an entry
in the array of PTEs indexed by the pte_index().

For the most architectures the pte_offset_kernel() implementation relies on
the availability of pmd_page_vaddr() that converts a PMD entry value to the
virtual address of the page containing PTEs array.

Let's move x86 definitions of the PTE accessors to the generic place in
 and then simply drop the respective definitions from the
other architectures.

The architectures that didn't provide pmd_page_vaddr() are updated to have
that defined.

The generic implementation of pte_offset_kernel() can be overridden by an
architecture and alpha makes use of this because it has special ordering
requirements for its version of pte_offset_kernel().

Signed-off-by: Mike Rapoport 
---
 arch/alpha/include/asm/pgtable.h |  4 +--
 arch/arc/include/asm/pgtable.h   | 15 --
 arch/arm/include/asm/pgtable.h   | 15 --
 arch/arm64/include/asm/pgtable.h | 10 +++
 arch/csky/include/asm/pgtable.h  | 16 ---
 arch/hexagon/include/asm/pgtable.h   | 25 +++--
 arch/ia64/include/asm/pgtable.h  |  9 --
 arch/m68k/include/asm/mcf_pgtable.h  | 12 +---
 arch/m68k/include/asm/motorola_pgalloc.h |  2 +-
 arch/m68k/include/asm/motorola_pgtable.h | 11 +---
 arch/m68k/include/asm/sun3_pgtable.h | 15 --
 arch/m68k/mm/init.c  |  2 +-
 arch/microblaze/include/asm/pgtable.h| 13 +++--
 arch/mips/include/asm/pgtable-32.h   | 12 
 arch/mips/include/asm/pgtable-64.h   | 11 
 arch/mips/kvm/mmu.c  | 20 +++---
 arch/mips/kvm/trap_emul.c|  2 +-
 arch/nds32/include/asm/pgtable.h | 12 +++-
 arch/nios2/include/asm/pgtable.h | 14 +++---
 arch/openrisc/include/asm/pgtable.h  | 23 
 arch/parisc/include/asm/pgtable.h| 15 +++---
 arch/powerpc/include/asm/book3s/32/pgtable.h | 13 ++---
 arch/powerpc/include/asm/book3s/64/pgtable.h |  8 --
 arch/powerpc/include/asm/nohash/32/pgtable.h | 13 ++---
 arch/powerpc/include/asm/nohash/64/pgtable.h |  7 -
 arch/powerpc/include/asm/pgtable.h   |  7 +
 arch/riscv/include/asm/pgtable.h | 10 ---
 arch/riscv/mm/init.c |  6 ++--
 arch/s390/include/asm/pgtable.h  | 10 ++-
 arch/s390/mm/pageattr.c  |  2 +-
 arch/sh/include/asm/pgtable_32.h | 15 --
 arch/sh/include/asm/pgtable_64.h | 12 
 arch/sparc/include/asm/pgalloc_64.h  |  2 +-
 arch/sparc/include/asm/pgtable_32.h  | 15 --
 arch/sparc/include/asm/pgtable_64.h  | 12 ++--
 arch/sparc/mm/srmmu.c| 10 ---
 arch/um/include/asm/pgtable.h| 13 -
 arch/unicore32/include/asm/pgtable.h |  9 --
 arch/x86/include/asm/pgtable.h   | 19 -
 arch/x86/include/asm/pgtable_32.h| 11 
 arch/x86/include/asm/pgtable_64.h|  4 ---
 arch/xtensa/include/asm/pgtable.h| 10 +--
 include/linux/pgtable.h  | 29 
 43 files changed, 113 insertions(+), 382 deletions(-)

diff --git a/arch/alpha/include/asm/pgtable.h b/arch/alpha/include/asm/pgtable.h
index 1c263922beb3..dac20d03b727 100644
--- a/arch/alpha/include/asm/pgtable.h
+++ b/arch/alpha/include/asm/pgtable.h
@@ -314,9 +314,7 @@ extern inline pte_t * pte_offset_kernel(pmd_t * dir, 
unsigned long address)
smp_read_barrier_depends(); /* see above */
return ret;
 }
-
-#define pte_offset_map(dir,addr)   pte_offset_kernel((dir),(addr))
-#define pte_unmap(pte) do { } while (0)
+#define pte_offset_kernel pte_offset_kernel
 
 extern pgd_t swapper_pg_dir[1024];
 
diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h
index 29137bcedd93..1146905f594b 100644
--- a/arch/arc/include/asm/pgtable.h
+++ b/arch/arc/include/asm/pgtable.h
@@ -248,9 +248,6 @@
 extern char empty_zero_page[PAGE_SIZE];
 #define ZERO_PAGE(vaddr)   (virt_to_page(empty_zero_page))
 
-#define pte_unmap(pte) do { } while (0)
-#define pte_unmap_nested(pte)  do { } while (0)
-
 #define set_pte(pteptr, pteval)((*(pteptr)) = (pteval))
 #define set_pmd(pmdptr, pmdval)(*(pmdptr) = pmdval)
 
@@ -282,18 +279,6 @@ static inline void pmd_set(pmd_t *pmdp, pte_t *ptep)
 
 /* Don't use virt_to_pfn for macros below: could cause truncations for PAE40*/
 #define pte_pfn(pte)   (pte_val(pte) >> PAGE_SHIFT)
-#define __pte_index(addr)  (((addr) >> PAGE_SHIF

[PATCH 08/12] mm: pgtable: add shortcuts for accessing kernel PMD and PTE

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

The powerpc 32-bit implementation of pgtable has nice shortcuts for
accessing kernel PMD and PTE for a given virtual address.
Make this helpers available for all architectures.

Signed-off-by: Mike Rapoport 
---
 arch/arc/mm/highmem.c | 10 +---
 arch/arm/mach-sa1100/assabet.c|  2 +-
 arch/arm/mm/highmem.c |  4 ++--
 arch/arm/mm/ioremap.c | 31 
 arch/arm/mm/mm.h  |  5 
 arch/arm/mm/mmu.c |  7 +-
 arch/hexagon/include/asm/fixmap.h |  4 
 arch/m68k/mm/motorola.c   | 26 
 arch/microblaze/kernel/signal.c   |  8 +--
 arch/microblaze/mm/init.c |  9 ---
 arch/mips/include/asm/fixmap.h|  3 ---
 arch/mips/mm/c-r3k.c  | 10 ++--
 arch/mips/mm/c-r4k.c  | 10 ++--
 arch/mips/mm/c-tx39.c | 10 ++--
 arch/mips/mm/highmem.c|  2 +-
 arch/nds32/include/asm/pgtable.h  |  2 --
 arch/nds32/mm/init.c  | 13 ++
 arch/nds32/mm/proc.c  |  6 +
 arch/parisc/mm/fixmap.c   |  6 +
 arch/powerpc/include/asm/pgtable.h| 19 ---
 arch/powerpc/mm/book3s32/mmu.c|  2 +-
 arch/powerpc/mm/book3s32/tlb.c|  4 ++--
 arch/powerpc/mm/kasan/kasan_init_32.c |  8 +++
 arch/powerpc/mm/nohash/40x.c  |  4 ++--
 arch/powerpc/mm/pgtable_32.c  |  2 +-
 arch/s390/mm/pageattr.c   | 10 +---
 arch/sh/mm/cache-sh4.c|  8 +--
 arch/sh/mm/kmap.c |  5 +---
 arch/sparc/mm/highmem.c   | 12 ++
 arch/sparc/mm/init_64.c   |  6 +
 arch/sparc/mm/io-unit.c   | 10 ++--
 arch/sparc/mm/iommu.c |  8 +--
 arch/sparc/mm/srmmu.c | 34 +++
 arch/um/kernel/mem.c  | 10 +---
 arch/um/kernel/trap.c |  8 +--
 arch/unicore32/mm/mm.h| 10 
 arch/x86/mm/init_32.c | 26 +++-
 arch/xtensa/include/asm/fixmap.h  |  8 ---
 arch/xtensa/mm/highmem.c  |  2 +-
 arch/xtensa/mm/kasan_init.c   | 10 ++--
 arch/xtensa/mm/mmu.c  |  5 +---
 include/linux/pgtable.h   | 18 ++
 42 files changed, 74 insertions(+), 323 deletions(-)

diff --git a/arch/arc/mm/highmem.c b/arch/arc/mm/highmem.c
index d6b74883fd1f..1b9f473c6369 100644
--- a/arch/arc/mm/highmem.c
+++ b/arch/arc/mm/highmem.c
@@ -92,17 +92,9 @@ EXPORT_SYMBOL(kunmap_atomic_high);
 
 static noinline pte_t * __init alloc_kmap_pgtable(unsigned long kvaddr)
 {
-   pgd_t *pgd_k;
-   p4d_t *p4d_k;
-   pud_t *pud_k;
-   pmd_t *pmd_k;
+   pmd_t *pmd_k = pmd_off_k(kvaddr);
pte_t *pte_k;
 
-   pgd_k = pgd_offset_k(kvaddr);
-   p4d_k = p4d_offset(pgd_k, kvaddr);
-   pud_k = pud_offset(p4d_k, kvaddr);
-   pmd_k = pmd_offset(pud_k, kvaddr);
-
pte_k = (pte_t *)memblock_alloc_low(PAGE_SIZE, PAGE_SIZE);
if (!pte_k)
panic("%s: Failed to allocate %lu bytes align=0x%lx\n",
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 8e3f5fdb4883..aa265ede5730 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -632,7 +632,7 @@ static void __init map_sa1100_gpio_regs( void )
int prot = PMD_TYPE_SECT | PMD_SECT_AP_WRITE | PMD_DOMAIN(DOMAIN_IO);
pmd_t *pmd;
 
-   pmd = pmd_offset(pud_offset(p4d_offset(pgd_offset_k(virt), virt), 
virt), virt);
+   pmd = pmd_off_k(virt);
*pmd = __pmd(phys | prot);
flush_pmd_entry(pmd);
 }
diff --git a/arch/arm/mm/highmem.c b/arch/arm/mm/highmem.c
index e013f6b81328..187fab227b50 100644
--- a/arch/arm/mm/highmem.c
+++ b/arch/arm/mm/highmem.c
@@ -18,7 +18,7 @@
 static inline void set_fixmap_pte(int idx, pte_t pte)
 {
unsigned long vaddr = __fix_to_virt(idx);
-   pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+   pte_t *ptep = virt_to_kpte(vaddr);
 
set_pte_ext(ptep, pte, 0);
local_flush_tlb_kernel_page(vaddr);
@@ -26,7 +26,7 @@ static inline void set_fixmap_pte(int idx, pte_t pte)
 
 static inline pte_t get_fixmap_pte(unsigned long vaddr)
 {
-   pte_t *ptep = pte_offset_kernel(pmd_off_k(vaddr), vaddr);
+   pte_t *ptep = virt_to_kpte(vaddr);
 
return *ptep;
 }
diff --git a/arch/arm/mm/ioremap.c b/arch/arm/mm/ioremap.c
index 75529d76d28c..000e821b 100644
--- a/arch/arm/mm/ioremap.c
+++ b/arch/arm/mm/ioremap.c
@@ -141,16 +141,8 @@ void __check_vmalloc_seq(struct mm_struct *mm)
 static void unmap_area_sections(unsigned long virt, unsigned long size)
 {
unsigned long addr = virt, end = virt + (size & ~(SZ_1M - 1));
-   pgd_t *pgd;
-   p4d_t *p4d;
-

[PATCH 07/12] x86/mm: simplify init_trampoline() and surrounding logic

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

There are three cases for the trampoline initialization:
* 32-bit does nothing
* 64-bit with kaslr disabled simply copies a PGD entry from the direct map
  to the trampoline PGD
* 64-bit with kaslr enabled maps the real mode trampoline at PUD level

These cases are currently differentiated by a bunch of ifdefs inside
asm/include/pgtable.h and the case of 64-bits with kaslr on uses
pgd_index() helper.

Replacing the ifdefs with a static function in arch/x86/mm/init.c gives
clearer code and allows moving pgd_index() to the generic implementation in
include/linux/pgtable.h

Signed-off-by: Mike Rapoport 
---
 arch/x86/include/asm/kaslr.h   |  2 ++
 arch/x86/include/asm/pgtable.h | 15 +--
 arch/x86/include/asm/setup.h   |  9 +
 arch/x86/mm/init.c | 22 ++
 arch/x86/mm/kaslr.c| 33 +
 5 files changed, 35 insertions(+), 46 deletions(-)

diff --git a/arch/x86/include/asm/kaslr.h b/arch/x86/include/asm/kaslr.h
index db7ba2feb947..0648190467ba 100644
--- a/arch/x86/include/asm/kaslr.h
+++ b/arch/x86/include/asm/kaslr.h
@@ -6,8 +6,10 @@ unsigned long kaslr_get_random_long(const char *purpose);
 
 #ifdef CONFIG_RANDOMIZE_MEMORY
 void kernel_randomize_memory(void);
+void init_trampoline_kaslr(void);
 #else
 static inline void kernel_randomize_memory(void) { }
+static inline void init_trampoline_kaslr(void) {}
 #endif /* CONFIG_RANDOMIZE_MEMORY */
 
 #endif
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index d24f8e1f7250..6366136b0e46 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -1070,27 +1070,14 @@ void init_mem_mapping(void);
 void early_alloc_pgt_buf(void);
 extern void memblock_find_dma_reserve(void);
 
+
 #ifdef CONFIG_X86_64
-/* Realmode trampoline initialization. */
 extern pgd_t trampoline_pgd_entry;
-static inline void __meminit init_trampoline_default(void)
-{
-   /* Default trampoline pgd value */
-   trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
-}
 
 void __init poking_init(void);
 
 unsigned long init_memory_mapping(unsigned long start,
  unsigned long end, pgprot_t prot);
-
-# ifdef CONFIG_RANDOMIZE_MEMORY
-void __meminit init_trampoline(void);
-# else
-#  define init_trampoline init_trampoline_default
-# endif
-#else
-static inline void init_trampoline(void) { }
 #endif
 
 /* local pte updates need not use xchg for locking */
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ed8ec011a9fd..d95cacf210bb 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -78,6 +78,15 @@ static inline bool kaslr_enabled(void)
return !!(boot_params.hdr.loadflags & KASLR_FLAG);
 }
 
+/*
+ * Apply no randomization if KASLR was disabled at boot or if KASAN
+ * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
+ */
+static inline bool kaslr_memory_enabled(void)
+{
+   return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
+}
+
 static inline unsigned long kaslr_offset(void)
 {
return (unsigned long)&_text - __START_KERNEL;
diff --git a/arch/x86/mm/init.c b/arch/x86/mm/init.c
index 235dd0e35741..e225ebb25197 100644
--- a/arch/x86/mm/init.c
+++ b/arch/x86/mm/init.c
@@ -682,6 +682,28 @@ static void __init memory_map_bottom_up(unsigned long 
map_start,
}
 }
 
+/*
+ * The real mode trampoline, which is required for bootstrapping CPUs
+ * occupies only a small area under the low 1MB.  See reserve_real_mode()
+ * for details.
+ *
+ * If KASLR is disabled the first PGD entry of the direct mapping is copied
+ * to map the real mode trampoline.
+ *
+ * If KASLR is enabled, copy only the PUD which covers the low 1MB
+ * area. This limits the randomization granularity to 1GB for both 4-level
+ * and 5-level paging.
+ */
+static void __init init_trampoline(void)
+{
+#ifdef CONFIG_X86_64
+   if (!kaslr_memory_enabled())
+   trampoline_pgd_entry = init_top_pgt[pgd_index(__PAGE_OFFSET)];
+   else
+   init_trampoline_kaslr();
+#endif
+}
+
 void __init init_mem_mapping(void)
 {
unsigned long end;
diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c
index faf02e1e1517..fb620fd9dae9 100644
--- a/arch/x86/mm/kaslr.c
+++ b/arch/x86/mm/kaslr.c
@@ -61,15 +61,6 @@ static inline unsigned long get_padding(struct 
kaslr_memory_region *region)
return (region->size_tb << TB_SHIFT);
 }
 
-/*
- * Apply no randomization if KASLR was disabled at boot or if KASAN
- * is enabled. KASAN shadow mappings rely on regions being PGD aligned.
- */
-static inline bool kaslr_memory_enabled(void)
-{
-   return kaslr_enabled() && !IS_ENABLED(CONFIG_KASAN);
-}
-
 /* Initialize base and padding for each memory region randomized with KASLR */
 void __init kernel_randomize_memory(void)
 {
@@ -148,7 +139,7 @@ void __init kernel_randomize_memory(void)

[PATCH 06/12] m68k/mm: move {cache, nocahe}_page() definitions close to their user

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

The cache_page() and nocache_page() functions are only used by the morotola
MMU variant for setting caching attributes for the page table pages.

Move the definitions of these functions from
arch/m68k/include/asm/motorola_pgtable.h closer to their usage in
arch/m68k/mm/motorola.c and drop unused definition in
arch/m68k/include/asm/mcf_pgtable.h.

Signed-off-by: Mike Rapoport 
---
 arch/m68k/include/asm/mcf_pgtable.h  | 40 -
 arch/m68k/include/asm/motorola_pgtable.h | 44 
 arch/m68k/mm/motorola.c  | 43 +++
 3 files changed, 43 insertions(+), 84 deletions(-)

diff --git a/arch/m68k/include/asm/mcf_pgtable.h 
b/arch/m68k/include/asm/mcf_pgtable.h
index 0031cd387b75..737e826294f3 100644
--- a/arch/m68k/include/asm/mcf_pgtable.h
+++ b/arch/m68k/include/asm/mcf_pgtable.h
@@ -328,46 +328,6 @@ extern pgd_t kernel_pg_dir[PTRS_PER_PGD];
 #define pte_offset_kernel(dir, address) \
((pte_t *) __pmd_page(*(dir)) + __pte_offset(address))
 
-/*
- * Disable caching for page at given kernel virtual address.
- */
-static inline void nocache_page(void *vaddr)
-{
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-   unsigned long addr = (unsigned long) vaddr;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mknocache(*ptep);
-}
-
-/*
- * Enable caching for page at given kernel virtual address.
- */
-static inline void cache_page(void *vaddr)
-{
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-   unsigned long addr = (unsigned long) vaddr;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mkcache(*ptep);
-}
-
 /*
  * Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e))
  */
diff --git a/arch/m68k/include/asm/motorola_pgtable.h 
b/arch/m68k/include/asm/motorola_pgtable.h
index 9e5a3de21e15..e1594acf7c7e 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -227,50 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, 
unsigned long address)
 #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) 
>> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 #define pte_unmap(pte) ((void)0)
 
-/* Prior to calling these routines, the page should have been flushed
- * from both the cache and ATC, or the CPU might not notice that the
- * cache setting for the page has been changed. -jskov
- */
-static inline void nocache_page(void *vaddr)
-{
-   unsigned long addr = (unsigned long)vaddr;
-
-   if (CPU_IS_040_OR_060) {
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mknocache(*ptep);
-   }
-}
-
-static inline void cache_page(void *vaddr)
-{
-   unsigned long addr = (unsigned long)vaddr;
-
-   if (CPU_IS_040_OR_060) {
-   pgd_t *dir;
-   p4d_t *p4dp;
-   pud_t *pudp;
-   pmd_t *pmdp;
-   pte_t *ptep;
-
-   dir = pgd_offset_k(addr);
-   p4dp = p4d_offset(dir, addr);
-   pudp = pud_offset(p4dp, addr);
-   pmdp = pmd_offset(pudp, addr);
-   ptep = pte_offset_kernel(pmdp, addr);
-   *ptep = pte_mkcache(*ptep);
-   }
-}
-
 /* Encode and de-code a swap entry (must be !pte_none(e) && !pte_present(e)) */
 #define __swp_type(x)  (((x).val >> 4) & 0xff)
 #define __swp_offset(x)((x).val >> 12)
diff --git a/arch/m68k/mm/motorola.c b/arch/m68k/mm/motorola.c
index 904c2a663977..8e5e74121a78 100644
--- a/arch/m68k/mm/motorola.c
+++ b/arch/m68k/mm/motorola.c
@@ -45,6 +45,49 @@ unsigned long mm_cachebits;
 EXPORT_SYMBOL(mm_cachebits);
 #endif
 
+/* Prior to calling these routines, the page should have been flushed
+ * from both the cache and ATC, or the CPU might not notice that the
+ * cache setting for the page has been changed. -jskov
+ */
+static inline void nocache_page(void *vaddr)
+{
+   unsigned long addr = (unsigned long)vaddr;
+
+   if (CPU_IS_040_OR_060) {
+   pgd_t *dir;
+   p4d_t *p4dp;
+   pud_t *pudp;
+   pmd_t *pmdp;
+   pte_t *ptep;

[PATCH 05/12] m68k/mm/motorola: move comment about page table allocation funcitons

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

The comment about page table allocation functions resides in
include/asm/motorola_pgtable.h while the functions live in
include/asm/motorola_pgaloc.h.

Move the comment close to the code.

Signed-off-by: Mike Rapoport 
---
 arch/m68k/include/asm/motorola_pgalloc.h | 6 ++
 arch/m68k/include/asm/motorola_pgtable.h | 6 --
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/m68k/include/asm/motorola_pgalloc.h 
b/arch/m68k/include/asm/motorola_pgalloc.h
index c66e42917912..f3cb453a07b7 100644
--- a/arch/m68k/include/asm/motorola_pgalloc.h
+++ b/arch/m68k/include/asm/motorola_pgalloc.h
@@ -18,6 +18,12 @@ extern void init_pointer_table(void *table, int type);
 extern void *get_pointer_table(int type);
 extern int free_pointer_table(void *table, int type);
 
+/*
+ * Allocate and free page tables. The xxx_kernel() versions are
+ * used to allocate a kernel page table - this turns on ASN bits
+ * if any.
+ */
+
 static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm)
 {
return get_pointer_table(TABLE_PTE);
diff --git a/arch/m68k/include/asm/motorola_pgtable.h 
b/arch/m68k/include/asm/motorola_pgtable.h
index 48f19f0ab1e7..9e5a3de21e15 100644
--- a/arch/m68k/include/asm/motorola_pgtable.h
+++ b/arch/m68k/include/asm/motorola_pgtable.h
@@ -227,12 +227,6 @@ static inline pte_t *pte_offset_kernel(pmd_t *pmdp, 
unsigned long address)
 #define pte_offset_map(pmdp,address) ((pte_t *)__pmd_page(*pmdp) + (((address) 
>> PAGE_SHIFT) & (PTRS_PER_PTE - 1)))
 #define pte_unmap(pte) ((void)0)
 
-/*
- * Allocate and free page tables. The xxx_kernel() versions are
- * used to allocate a kernel page table - this turns on ASN bits
- * if any.
- */
-
 /* Prior to calling these routines, the page should have been flushed
  * from both the cache and ATC, or the CPU might not notice that the
  * cache setting for the page has been changed. -jskov
-- 
2.26.1

[PATCH 04/12] csky: replace definitions of __pXd_offset() with pXd_index()

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

All architectures use pXd_index() to get an entry in the page table page
corresponding to a virtual address.

Align csky with other architectures.

Signed-off-by: Mike Rapoport 
---
 arch/csky/include/asm/pgtable.h | 5 ++---
 arch/csky/mm/fault.c| 2 +-
 arch/csky/mm/highmem.c  | 2 +-
 arch/csky/mm/init.c | 6 +++---
 4 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/arch/csky/include/asm/pgtable.h b/arch/csky/include/asm/pgtable.h
index 9d2d16db237d..2eff4aea51b3 100644
--- a/arch/csky/include/asm/pgtable.h
+++ b/arch/csky/include/asm/pgtable.h
@@ -229,9 +229,8 @@ static inline pte_t pte_mkyoung(pte_t pte)
return pte;
 }
 
-#define __pgd_offset(address)  pgd_index(address)
-#define __pud_offset(address)  (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
-#define __pmd_offset(address)  (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
+#define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
+#define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address)  pgd_offset(_mm, address)
diff --git a/arch/csky/mm/fault.c b/arch/csky/mm/fault.c
index 4e6dc68f3258..4055d430c0c8 100644
--- a/arch/csky/mm/fault.c
+++ b/arch/csky/mm/fault.c
@@ -78,7 +78,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned 
long write,
 * Do _not_ use "tsk" here. We might be inside
 * an interrupt in the middle of a task switch..
 */
-   int offset = __pgd_offset(address);
+   int offset = pgd_index(address);
pgd_t *pgd, *pgd_k;
pud_t *pud, *pud_k;
pmd_t *pmd, *pmd_k;
diff --git a/arch/csky/mm/highmem.c b/arch/csky/mm/highmem.c
index 3b3f622f5ae9..89ec32e602a1 100644
--- a/arch/csky/mm/highmem.c
+++ b/arch/csky/mm/highmem.c
@@ -92,7 +92,7 @@ static void __init kmap_pages_init(void)
vaddr = PKMAP_BASE;
fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, swapper_pg_dir);
 
-   pgd = swapper_pg_dir + __pgd_offset(vaddr);
+   pgd = swapper_pg_dir + pgd_index(vaddr);
pud = (pud_t *)pgd;
pmd = pmd_offset(pud, vaddr);
pte = pte_offset_kernel(pmd, vaddr);
diff --git a/arch/csky/mm/init.c b/arch/csky/mm/init.c
index eda2b4291485..af627128314f 100644
--- a/arch/csky/mm/init.c
+++ b/arch/csky/mm/init.c
@@ -157,9 +157,9 @@ void __init fixrange_init(unsigned long start, unsigned 
long end,
unsigned long vaddr;
 
vaddr = start;
-   i = __pgd_offset(vaddr);
-   j = __pud_offset(vaddr);
-   k = __pmd_offset(vaddr);
+   i = pgd_index(vaddr);
+   j = pud_index(vaddr);
+   k = pmd_index(vaddr);
pgd = pgd_base + i;
 
for ( ; (i < PTRS_PER_PGD) && (vaddr != end); pgd++, i++) {
-- 
2.26.1

[PATCH 03/12] mm: reorder includes after introduction of linux/pgtable.h

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

The replacement of  with  made the include
of the latter in the middle of asm includes. Fix this up with the aid of
the below script and manual adjustments here and there.

import sys
import re

if len(sys.argv) is not 3:
print "USAGE: %s  " % (sys.argv[0])
sys.exit(1)

hdr_to_move="#include " % sys.argv[2]
moved = False
in_hdrs = False

with open(sys.argv[1], "r") as f:
lines = f.readlines()
for _line in lines:
line = _line.rstrip('\n')
if line == hdr_to_move:
continue
if line.startswith("#include 
---
 arch/alpha/kernel/proto.h | 2 --
 arch/arc/mm/highmem.c | 2 +-
 arch/arc/mm/tlbex.S   | 2 +-
 arch/arm/include/asm/efi.h| 1 -
 arch/arm/include/asm/fixmap.h | 2 +-
 arch/arm/kernel/head.S| 2 +-
 arch/arm/kernel/suspend.c | 2 +-
 arch/arm/kernel/vmlinux.lds.S | 2 +-
 arch/arm/mach-integrator/core.c   | 2 +-
 arch/arm/mach-keystone/platsmp.c  | 2 +-
 arch/arm/mach-sa1100/hackkit.c| 2 +-
 arch/arm/mach-zynq/common.c   | 2 +-
 arch/arm/mm/idmap.c   | 2 +-
 arch/arm/mm/mm.h  | 1 -
 arch/arm/mm/proc-arm1020.S| 2 +-
 arch/arm/mm/proc-arm1020e.S   | 2 +-
 arch/arm/mm/proc-arm1022.S| 2 +-
 arch/arm/mm/proc-arm1026.S| 2 +-
 arch/arm/mm/proc-arm720.S | 2 +-
 arch/arm/mm/proc-arm740.S | 2 +-
 arch/arm/mm/proc-arm7tdmi.S   | 2 +-
 arch/arm/mm/proc-arm920.S | 2 +-
 arch/arm/mm/proc-arm922.S | 2 +-
 arch/arm/mm/proc-arm925.S | 2 +-
 arch/arm/mm/proc-arm926.S | 2 +-
 arch/arm/mm/proc-arm940.S | 2 +-
 arch/arm/mm/proc-arm946.S | 2 +-
 arch/arm/mm/proc-arm9tdmi.S   | 2 +-
 arch/arm/mm/proc-fa526.S  | 2 +-
 arch/arm/mm/proc-feroceon.S   | 2 +-
 arch/arm/mm/proc-mohawk.S | 2 +-
 arch/arm/mm/proc-sa110.S  | 2 +-
 arch/arm/mm/proc-sa1100.S | 2 +-
 arch/arm/mm/proc-v6.S | 2 +-
 arch/arm/mm/proc-v7.S | 2 +-
 arch/arm/mm/proc-xsc3.S   | 2 +-
 arch/arm/mm/proc-xscale.S | 2 +-
 arch/arm/mm/pv-fixup-asm.S| 2 +-
 arch/arm64/include/asm/io.h   | 2 +-
 arch/arm64/include/asm/kvm_mmu.h  | 2 +-
 arch/arm64/include/asm/mmu_context.h  | 2 +-
 arch/arm64/include/asm/vmap_stack.h   | 2 +-
 arch/arm64/kernel/acpi.c  | 2 +-
 arch/arm64/kernel/head.S  | 2 +-
 arch/arm64/kernel/kaslr.c | 2 +-
 arch/arm64/kernel/suspend.c   | 2 +-
 arch/arm64/kernel/vmlinux.lds.S   | 1 -
 arch/arm64/mm/proc.S  | 2 +-
 arch/ia64/kernel/entry.S  | 2 +-
 arch/ia64/kernel/head.S   | 3 ++-
 arch/ia64/kernel/irq_ia64.c   | 2 +-
 arch/ia64/kernel/ivt.S| 2 +-
 arch/ia64/kernel/kprobes.c| 2 +-
 arch/ia64/kernel/mca_asm.S| 2 +-
 arch/ia64/kernel/relocate_kernel.S| 4 +---
 arch/ia64/kernel/setup.c  | 2 +-
 arch/ia64/kernel/uncached.c   | 2 +-
 arch/ia64/kernel/vmlinux.lds.S| 2 +-
 arch/m68k/68000/m68VZ328.c| 2 +-
 arch/m68k/include/asm/sun3xflop.h | 2 +-
 arch/m68k/kernel/head.S   | 2 +-
 arch/microblaze/include/asm/pgalloc.h | 2 +-
 arch/microblaze/kernel/hw_exception_handler.S | 2 +-
 arch/microblaze/kernel/module.c   | 2 +-
 arch/microblaze/kernel/setup.c| 2 +-
 arch/microblaze/mm/pgtable.c  | 2 +-
 arch/mips/jazz/irq.c  | 2 +-
 arch/mips/jazz/setup.c| 2 +-
 arch/mips/kvm/mips.c

[PATCH 01/12] mm: don't include asm/pgtable.h if linux/mm.h is already included

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

The linux/mm.h header includes  to allow inlining of the
functions involving page table manipulations, e.g. pte_alloc() and
pmd_alloc(). So, there is no point to explicitly include  in
the files that include .

The include statements in such cases are remove with a simple loop:

for f in $(git grep -l "include ") ; do
sed -i -e '/include / d' $f
done

Signed-off-by: Mike Rapoport 
---
 arch/alpha/boot/bootp.c   | 1 -
 arch/alpha/boot/bootpz.c  | 1 -
 arch/alpha/boot/main.c| 1 -
 arch/alpha/include/asm/io.h   | 1 -
 arch/alpha/kernel/process.c   | 1 -
 arch/alpha/kernel/ptrace.c| 1 -
 arch/alpha/kernel/setup.c | 1 -
 arch/alpha/kernel/smp.c   | 1 -
 arch/alpha/kernel/sys_alcor.c | 1 -
 arch/alpha/kernel/sys_cabriolet.c | 1 -
 arch/alpha/kernel/sys_dp264.c | 1 -
 arch/alpha/kernel/sys_eb64p.c | 1 -
 arch/alpha/kernel/sys_eiger.c | 1 -
 arch/alpha/kernel/sys_jensen.c| 1 -
 arch/alpha/kernel/sys_marvel.c| 1 -
 arch/alpha/kernel/sys_miata.c | 1 -
 arch/alpha/kernel/sys_mikasa.c| 1 -
 arch/alpha/kernel/sys_nautilus.c  | 1 -
 arch/alpha/kernel/sys_noritake.c  | 1 -
 arch/alpha/kernel/sys_rawhide.c   | 1 -
 arch/alpha/kernel/sys_ruffian.c   | 1 -
 arch/alpha/kernel/sys_rx164.c | 1 -
 arch/alpha/kernel/sys_sable.c | 1 -
 arch/alpha/kernel/sys_sio.c   | 1 -
 arch/alpha/kernel/sys_sx164.c | 1 -
 arch/alpha/kernel/sys_takara.c| 1 -
 arch/alpha/kernel/sys_titan.c | 1 -
 arch/alpha/kernel/sys_wildfire.c  | 1 -
 arch/alpha/mm/init.c  | 1 -
 arch/arm/kernel/machine_kexec.c   | 1 -
 arch/arm/kernel/module.c  | 1 -
 arch/arm/kernel/ptrace.c  | 1 -
 arch/arm/kernel/smp.c | 1 -
 arch/arm/mach-ebsa110/core.c  | 1 -
 arch/arm/mach-footbridge/common.c | 1 -
 arch/arm/mach-imx/mm-imx21.c  | 1 -
 arch/arm/mach-imx/mm-imx27.c  | 1 -
 arch/arm/mach-imx/mm-imx3.c   | 1 -
 arch/arm/mach-iop32x/i2c.c| 1 -
 arch/arm/mach-iop32x/iq31244.c| 1 -
 arch/arm/mach-iop32x/iq80321.c| 1 -
 arch/arm/mach-iop32x/n2100.c  | 1 -
 arch/arm/mach-ixp4xx/common.c | 1 -
 arch/arm/mach-sa1100/assabet.c| 1 -
 arch/arm/mm/copypage-v4mc.c   | 1 -
 arch/arm/mm/copypage-v6.c | 1 -
 arch/arm/mm/copypage-xscale.c | 1 -
 arch/arm/mm/dump.c| 1 -
 arch/arm/mm/fault-armv.c  | 1 -
 arch/arm/mm/fault.c   | 1 -
 arch/arm/mm/pageattr.c| 1 -
 arch/arm64/kernel/hibernate.c | 1 -
 arch/arm64/kernel/ptrace.c| 1 -
 arch/arm64/kernel/smp.c   | 1 -
 arch/arm64/mm/dump.c  | 1 -
 arch/arm64/mm/fault.c | 1 -
 arch/arm64/mm/kasan_init.c| 1 -
 arch/arm64/mm/pageattr.c  | 1 -
 arch/csky/kernel/module.c | 1 -
 arch/csky/kernel/ptrace.c | 1 -
 arch/csky/mm/init.c   | 1 -
 arch/csky/mm/tlb.c| 1 -
 arch/h8300/kernel/process.c   | 1 -
 arch/h8300/kernel/setup.c | 1 -
 arch/h8300/kernel/signal.c| 1 -
 arch/h8300/mm/fault.c | 1 -
 arch/h8300/mm/init.c  | 1 -
 arch/h8300/mm/memory.c| 1 -
 arch/hexagon/mm/vm_fault.c| 1 -
 arch/ia64/kernel/efi.c| 1 -
 arch/ia64/kernel/ptrace.c | 1 -
 arch/ia64/kernel/smp.c| 1 -
 arch/ia64/kernel/smpboot.c| 1 -
 arch/ia64/mm/contig.c | 1 -
 arch/ia64/mm/fault.c  | 1 -
 arch/m68k/68000/timers.c  | 1 -
 arch/m68k/amiga/config.c  | 1 -
 arch/m68k/apollo/config.c | 1 -
 arch/m68k/atari/atasound.c| 1 -
 arch/m68k/atari/stram.c   | 1 -
 arch/m68k/bvme6000/config.c   | 1 -
 arch/m68k/kernel/process.c| 1 -
 arch/m68k/kernel/ptrace.c | 1 -
 arch/m68k/kernel/setup_no.c   | 1 -
 arch/m68k/kernel/signal.c | 1 -
 arch/m68k/kernel/uboot.c

[PATCH 00/12] mm: consolidate definitions of page table accessors

2020-05-12 Thread Mike Rapoport

From: Mike Rapoport 

Hi,

The low level page table accessors (pXY_index(), pXY_offset()) are
duplicated across all architectures and sometimes more than once. For
instance, we have 31 definition of pgd_offset() for 25 supported
architectures.

Most of these definitions are actually identical and typically it boils
down to, e.g. 

static inline unsigned long pmd_index(unsigned long address)
{
return (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
}

static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address)
{
return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(address);
}

These definitions can be shared among 90% of the arches provided XYZ_SHIFT,
PTRS_PER_XYZ and xyz_page_vaddr() are defined.

For architectures that really need a custom version there is always
possibility to override the generic version with the usual ifdefs magic.

These patches introduce include/linux/pgtable.h that replaces
include/asm-generic/pgtable.h and add the definitions of the page table
accessors to the new header.

The patches are vs the v5.7-rc5-mmots-2020-05-11-15-47

Mike Rapoport (12):
  mm: don't include asm/pgtable.h if linux/mm.h is already included
  mm: introduce include/linux/pgtable.h
  mm: reorder includes after introduction of linux/pgtable.h
  csky: replace definitions of __pXd_offset() with pXd_index()
  m68k/mm/motorola: move comment about page table allocation funcitons
  m68k/mm: move {cache,nocahe}_page() definitions close to their user
  x86/mm: simplify init_trampoline() and surrounding logic
  mm: pgtable: add shortcuts for accessing kernel PMD and PTE
  mm: consolidate pte_index() and pte_offset_*() definitions
  mm: consolidate pmd_index() and pmd_offset() definitions
  mm: consolidate pud_index() and pud_offset() definitions
  mm: consolidate pgd_index() and pgd_offset{_k}() definitions

 arch/alpha/boot/bootp.c   |   1 -
 arch/alpha/boot/bootpz.c  |   1 -
 arch/alpha/boot/main.c|   1 -
 arch/alpha/include/asm/io.h   |   1 -
 arch/alpha/include/asm/pgtable.h  |  16 +--
 arch/alpha/kernel/process.c   |   1 -
 arch/alpha/kernel/proto.h |   2 -
 arch/alpha/kernel/ptrace.c|   1 -
 arch/alpha/kernel/setup.c |   1 -
 arch/alpha/kernel/smp.c   |   1 -
 arch/alpha/kernel/sys_alcor.c |   1 -
 arch/alpha/kernel/sys_cabriolet.c |   1 -
 arch/alpha/kernel/sys_dp264.c |   1 -
 arch/alpha/kernel/sys_eb64p.c |   1 -
 arch/alpha/kernel/sys_eiger.c |   1 -
 arch/alpha/kernel/sys_jensen.c|   1 -
 arch/alpha/kernel/sys_marvel.c|   1 -
 arch/alpha/kernel/sys_miata.c |   1 -
 arch/alpha/kernel/sys_mikasa.c|   1 -
 arch/alpha/kernel/sys_nautilus.c  |   1 -
 arch/alpha/kernel/sys_noritake.c  |   1 -
 arch/alpha/kernel/sys_rawhide.c   |   1 -
 arch/alpha/kernel/sys_ruffian.c   |   1 -
 arch/alpha/kernel/sys_rx164.c |   1 -
 arch/alpha/kernel/sys_sable.c |   1 -
 arch/alpha/kernel/sys_sio.c   |   1 -
 arch/alpha/kernel/sys_sx164.c |   1 -
 arch/alpha/kernel/sys_takara.c|   1 -
 arch/alpha/kernel/sys_titan.c |   1 -
 arch/alpha/kernel/sys_wildfire.c  |   1 -
 arch/alpha/mm/init.c  |   1 -
 arch/arc/include/asm/pgtable.h|  24 
 arch/arc/mm/highmem.c |  12 +-
 arch/arc/mm/tlbex.S   |   2 +-
 arch/arm/include/asm/efi.h|   1 -
 arch/arm/include/asm/fixmap.h |   2 +-
 arch/arm/include/asm/idmap.h  |   2 +-
 arch/arm/include/asm/pgtable-2level.h |   1 +
 arch/arm/include/asm/pgtable-3level.h |   7 --
 arch/arm/include/asm/pgtable-nommu.h  |   3 -
 arch/arm/include/asm/pgtable.h|  25 
 arch/arm/kernel/head.S|   2 +-
 arch/arm/kernel/machine_kexec.c   |   1 -
 arch/arm/kernel/module.c  |   1 -
 arch/arm/kernel/ptrace.c  |   1 -
 arch/arm/kernel/smp.c |   1 -
 arch/arm/kernel/suspend.c |   2 +-
 arch/arm/kernel/vmlinux.lds.S |   2 +-
 arch/arm/mach-ebsa110/core.c  |   1 -
 arch/arm/mach-footbridge/common.c |   1 -
 arch/arm/mach-imx/mm-imx21.c  |   1 -
 arch/arm/mach-imx/mm-imx27.c  |   1 -
 arch/arm/mach-imx/mm-imx3.c   |   1 -
 arch/arm/mach-integrator/core.c   |   2 +-
 arch/arm/mach-iop32x/i2c.c|   1 -
 arch/arm/mach-iop32x/iq31244.c|   1 -
 arch/arm/mach-iop32x/iq80321.c|

Re: [PATCH V3 3/3] mm/hugetlb: Define a generic fallback for arch_clear_hugepage_flags()

2020-05-11 Thread Mike Kravetz

On 5/7/20 8:07 PM, Anshuman Khandual wrote:
> There are multiple similar definitions for arch_clear_hugepage_flags() on
> various platforms. Lets just add it's generic fallback definition for
> platforms that do not override. This help reduce code duplication.
> 
> Cc: Russell King 
> Cc: Catalin Marinas 
> Cc: Will Deacon 
> Cc: Tony Luck 
> Cc: Fenghua Yu 
> Cc: Thomas Bogendoerfer 
> Cc: "James E.J. Bottomley" 
> Cc: Helge Deller 
> Cc: Benjamin Herrenschmidt 
> Cc: Paul Mackerras 
> Cc: Michael Ellerman 
> Cc: Paul Walmsley 
> Cc: Palmer Dabbelt 
> Cc: Heiko Carstens 
> Cc: Vasily Gorbik 
> Cc: Christian Borntraeger 
> Cc: Yoshinori Sato 
> Cc: Rich Felker 
> Cc: "David S. Miller" 
> Cc: Thomas Gleixner 
> Cc: Ingo Molnar 
> Cc: Borislav Petkov 
> Cc: "H. Peter Anvin" 
> Cc: Mike Kravetz 
> Cc: Andrew Morton 
> Cc: x...@kernel.org
> Cc: linux-arm-ker...@lists.infradead.org
> Cc: linux-i...@vger.kernel.org
> Cc: linux-m...@vger.kernel.org
> Cc: linux-par...@vger.kernel.org
> Cc: linuxppc-dev@lists.ozlabs.org
> Cc: linux-ri...@lists.infradead.org
> Cc: linux-s...@vger.kernel.org
> Cc: linux...@vger.kernel.org
> Cc: sparcli...@vger.kernel.org
> Cc: linux...@kvack.org
> Cc: linux-a...@vger.kernel.org
> Cc: linux-ker...@vger.kernel.org
> Signed-off-by: Anshuman Khandual 

Thanks!
Removing duplicate code is good.

Acked-by: Mike Kravetz 

-- 
Mike Kravetz

Re: [PATCH V3 2/3] mm/hugetlb: Define a generic fallback for is_hugepage_only_range()

2020-05-11 Thread Mike Kravetz

On 5/10/20 8:14 PM, Anshuman Khandual wrote:
> On 05/09/2020 03:52 AM, Mike Kravetz wrote:
>> On 5/7/20 8:07 PM, Anshuman Khandual wrote:
>>
>> Did you try building without CONFIG_HUGETLB_PAGE defined?  I'm guessing
> 
> Yes I did for multiple platforms (s390, arm64, ia64, x86, powerpc etc).
> 
>> that you need a stub for is_hugepage_only_range().  Or, perhaps add this
>> to asm-generic/hugetlb.h?
>>
> There is already a stub (include/linux/hugetlb.h) when !CONFIG_HUGETLB_PAGE.
> 

Thanks!  I missed that stub in the existing code.  I like the removal of
redundant code.

Acked-by: Mike Kravetz 

-- 
Mike Kravetz

< 2 3 4 5 6 7 8 9 10 11 >

601 - 700 of 1497 matches

Mail list logo