Re: [PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()
On Fri, 2015-10-09 at 20:55 -0400, Dan Williams wrote: > In support of providing struct page for large persistent memory > capacities, use struct vmem_altmap to change the default policy for > allocating memory for the memmap array. The default vmemmap_populate() > allocates page table storage area from the page allocator. Given > persistent memory capacities relative to DRAM it may not be feasible to > store the memmap in 'System Memory'. Instead vmem_altmap represents > pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() > requests. > > Cc: H. Peter Anvin > Cc: Ingo Molnar > Cc: Dave Hansen > Cc: Andrew Morton > Signed-off-by: Dan Williams > --- The kbuild test robot reported a crash with this patch when CONFIG_ZONE_DEVICE=y && CONFIG_SPARSEMEM_VMEMMAP=n. The ability to specify an alternate location for the vmemmap needs to be gated on CONFIG_SPARSEMEM_VMEMMAP=y. Here's a refreshed patch with ifdef guards and a warning message if the @altmap arg is passed to devm_memremap_pages() on a CONFIG_SPARSEMEM_VMEMMAP=n kernel. 8< Subject: x86, mm: introduce vmem_altmap to augment vmemmap_populate() From: Dan Williams In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Dave Hansen Cc: Andrew Morton Reported-by: kbuild test robot Signed-off-by: Dan Williams --- arch/m68k/include/asm/page_mm.h |1 arch/m68k/include/asm/page_no.h |1 arch/mn10300/include/asm/page.h |1 arch/x86/mm/init_64.c | 32 ++--- drivers/nvdimm/pmem.c |6 ++ include/linux/io.h | 17 --- include/linux/memory_hotplug.h |3 + include/linux/mm.h | 98 ++- kernel/memremap.c | 77 +++ mm/memory_hotplug.c | 66 +++--- mm/page_alloc.c | 10 mm/sparse-vmemmap.c | 37 ++- mm/sparse.c |8 ++- 13 files changed, 294 insertions(+), 63 deletions(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 5029f73e6294..884f2f7e4caf 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -125,6 +125,7 @@ static inline void *__va(unsigned long x) */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) extern int m68k_virt_to_node_shift; diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index ef209169579a..7845eca0b36d 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -24,6 +24,7 @@ extern unsigned long memory_end; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)) diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 8288e124165b..3810a6f740fd 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -107,6 +107,7 @@ static inline int get_order(unsigned long size) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define pfn_to_page(pfn) (mem_map + ((pfn) - __pfn_disp)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map) + __pfn_disp) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define pfn_valid(pfn) \ ({ \ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e5d42f1a2a71..cabf8ceb0a6b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + return; + } /* bootmem page has reserved flag */ if (PageReserved(page)) { @@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >>
Re: [PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()
On Fri, 2015-10-09 at 20:55 -0400, Dan Williams wrote: > In support of providing struct page for large persistent memory > capacities, use struct vmem_altmap to change the default policy for > allocating memory for the memmap array. The default vmemmap_populate() > allocates page table storage area from the page allocator. Given > persistent memory capacities relative to DRAM it may not be feasible to > store the memmap in 'System Memory'. Instead vmem_altmap represents > pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() > requests. > > Cc: H. Peter Anvin> Cc: Ingo Molnar > Cc: Dave Hansen > Cc: Andrew Morton > Signed-off-by: Dan Williams > --- The kbuild test robot reported a crash with this patch when CONFIG_ZONE_DEVICE=y && CONFIG_SPARSEMEM_VMEMMAP=n. The ability to specify an alternate location for the vmemmap needs to be gated on CONFIG_SPARSEMEM_VMEMMAP=y. Here's a refreshed patch with ifdef guards and a warning message if the @altmap arg is passed to devm_memremap_pages() on a CONFIG_SPARSEMEM_VMEMMAP=n kernel. 8< Subject: x86, mm: introduce vmem_altmap to augment vmemmap_populate() From: Dan Williams In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Dave Hansen Cc: Andrew Morton Reported-by: kbuild test robot Signed-off-by: Dan Williams --- arch/m68k/include/asm/page_mm.h |1 arch/m68k/include/asm/page_no.h |1 arch/mn10300/include/asm/page.h |1 arch/x86/mm/init_64.c | 32 ++--- drivers/nvdimm/pmem.c |6 ++ include/linux/io.h | 17 --- include/linux/memory_hotplug.h |3 + include/linux/mm.h | 98 ++- kernel/memremap.c | 77 +++ mm/memory_hotplug.c | 66 +++--- mm/page_alloc.c | 10 mm/sparse-vmemmap.c | 37 ++- mm/sparse.c |8 ++- 13 files changed, 294 insertions(+), 63 deletions(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 5029f73e6294..884f2f7e4caf 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -125,6 +125,7 @@ static inline void *__va(unsigned long x) */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) extern int m68k_virt_to_node_shift; diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index ef209169579a..7845eca0b36d 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -24,6 +24,7 @@ extern unsigned long memory_end; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)) diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 8288e124165b..3810a6f740fd 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -107,6 +107,7 @@ static inline int get_order(unsigned long size) #define pfn_to_kaddr(pfn) __va((pfn) << PAGE_SHIFT) #define pfn_to_page(pfn) (mem_map + ((pfn) - __pfn_disp)) #define page_to_pfn(page) ((unsigned long)((page) - mem_map) + __pfn_disp) +#define __pfn_to_phys(pfn) PFN_PHYS(pfn) #define pfn_valid(pfn) \ ({ \ diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e5d42f1a2a71..cabf8ceb0a6b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); +
[PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()
In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Cc: H. Peter Anvin Cc: Ingo Molnar Cc: Dave Hansen Cc: Andrew Morton Signed-off-by: Dan Williams --- arch/m68k/include/asm/page_mm.h |1 arch/m68k/include/asm/page_no.h |1 arch/x86/mm/init_64.c | 32 ++--- drivers/nvdimm/pmem.c |6 ++ include/linux/io.h | 17 --- include/linux/memory_hotplug.h |3 + include/linux/mm.h | 95 ++- kernel/memremap.c | 69 +--- mm/memory_hotplug.c | 66 +++ mm/page_alloc.c | 10 mm/sparse-vmemmap.c | 37 ++- mm/sparse.c |8 ++- 12 files changed, 282 insertions(+), 63 deletions(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 5029f73e6294..884f2f7e4caf 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -125,6 +125,7 @@ static inline void *__va(unsigned long x) */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) extern int m68k_virt_to_node_shift; diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index ef209169579a..7845eca0b36d 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -24,6 +24,7 @@ extern unsigned long memory_end; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e5d42f1a2a71..cabf8ceb0a6b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + return; + } /* bootmem page has reserved flag */ if (PageReserved(page)) { @@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct vmem_altmap *altmap; struct zone *zone; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - kernel_physical_mapping_remove(start, start + size); + /* With altmap the first mapped page is offset from @start */ + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + kernel_physical_mapping_remove(start, start + size); return ret; } @@ -1234,7 +1246,7 @@ static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node) + unsigned long end, int node, struct vmem_altmap *altmap) { unsigned long addr; unsigned long next; @@ -1257,7 +1269,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (p) { pte_t entry; @@ -1278,7 +1290,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; continue; - } + } else if (altmap) + return -ENOMEM; /* no fallback */ } else if (pmd_large(*pmd)) {
[PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()
In support of providing struct page for large persistent memory capacities, use struct vmem_altmap to change the default policy for allocating memory for the memmap array. The default vmemmap_populate() allocates page table storage area from the page allocator. Given persistent memory capacities relative to DRAM it may not be feasible to store the memmap in 'System Memory'. Instead vmem_altmap represents pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf() requests. Cc: H. Peter AnvinCc: Ingo Molnar Cc: Dave Hansen Cc: Andrew Morton Signed-off-by: Dan Williams --- arch/m68k/include/asm/page_mm.h |1 arch/m68k/include/asm/page_no.h |1 arch/x86/mm/init_64.c | 32 ++--- drivers/nvdimm/pmem.c |6 ++ include/linux/io.h | 17 --- include/linux/memory_hotplug.h |3 + include/linux/mm.h | 95 ++- kernel/memremap.c | 69 +--- mm/memory_hotplug.c | 66 +++ mm/page_alloc.c | 10 mm/sparse-vmemmap.c | 37 ++- mm/sparse.c |8 ++- 12 files changed, 282 insertions(+), 63 deletions(-) diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h index 5029f73e6294..884f2f7e4caf 100644 --- a/arch/m68k/include/asm/page_mm.h +++ b/arch/m68k/include/asm/page_mm.h @@ -125,6 +125,7 @@ static inline void *__va(unsigned long x) */ #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) extern int m68k_virt_to_node_shift; diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h index ef209169579a..7845eca0b36d 100644 --- a/arch/m68k/include/asm/page_no.h +++ b/arch/m68k/include/asm/page_no.h @@ -24,6 +24,7 @@ extern unsigned long memory_end; #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT) #define pfn_to_virt(pfn) __va((pfn) << PAGE_SHIFT) +#define__pfn_to_phys(pfn) PFN_PHYS(pfn) #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) >> PAGE_SHIFT)) #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + PAGE_OFFSET)) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index e5d42f1a2a71..cabf8ceb0a6b 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, int order) { unsigned long magic; unsigned int nr_pages = 1 << order; + struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page); + + if (altmap) { + vmem_altmap_free(altmap, nr_pages); + return; + } /* bootmem page has reserved flag */ if (PageReserved(page)) { @@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size) { unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; + struct page *page = pfn_to_page(start_pfn); + struct vmem_altmap *altmap; struct zone *zone; int ret; - zone = page_zone(pfn_to_page(start_pfn)); - kernel_physical_mapping_remove(start, start + size); + /* With altmap the first mapped page is offset from @start */ + altmap = to_vmem_altmap((unsigned long) page); + if (altmap) + page += vmem_altmap_offset(altmap); + zone = page_zone(page); ret = __remove_pages(zone, start_pfn, nr_pages); WARN_ON_ONCE(ret); + kernel_physical_mapping_remove(start, start + size); return ret; } @@ -1234,7 +1246,7 @@ static void __meminitdata *p_start, *p_end; static int __meminitdata node_start; static int __meminit vmemmap_populate_hugepages(unsigned long start, - unsigned long end, int node) + unsigned long end, int node, struct vmem_altmap *altmap) { unsigned long addr; unsigned long next; @@ -1257,7 +1269,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, if (pmd_none(*pmd)) { void *p; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (p) { pte_t entry; @@ -1278,7 +1290,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start, addr_end = addr + PMD_SIZE; p_end = p + PMD_SIZE; continue; - } + } else if (altmap) +