Re: [PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()

2015-10-19 Thread Williams, Dan J
On Fri, 2015-10-09 at 20:55 -0400, Dan Williams wrote:
> In support of providing struct page for large persistent memory
> capacities, use struct vmem_altmap to change the default policy for
> allocating memory for the memmap array.  The default vmemmap_populate()
> allocates page table storage area from the page allocator.  Given
> persistent memory capacities relative to DRAM it may not be feasible to
> store the memmap in 'System Memory'.  Instead vmem_altmap represents
> pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
> requests.
> 
> Cc: H. Peter Anvin 
> Cc: Ingo Molnar 
> Cc: Dave Hansen 
> Cc: Andrew Morton 
> Signed-off-by: Dan Williams 
> ---

The kbuild test robot reported a crash with this patch when
CONFIG_ZONE_DEVICE=y && CONFIG_SPARSEMEM_VMEMMAP=n.  The ability to
specify an alternate location for the vmemmap needs to be gated on
CONFIG_SPARSEMEM_VMEMMAP=y.

Here's a refreshed patch with ifdef guards and a warning message if the
@altmap arg is passed to devm_memremap_pages() on a
CONFIG_SPARSEMEM_VMEMMAP=n kernel.


8<
Subject: x86, mm: introduce vmem_altmap to augment vmemmap_populate()

From: Dan Williams 

In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Dave Hansen 
Cc: Andrew Morton 
Reported-by: kbuild test robot 
Signed-off-by: Dan Williams 
---
 arch/m68k/include/asm/page_mm.h |1 
 arch/m68k/include/asm/page_no.h |1 
 arch/mn10300/include/asm/page.h |1 
 arch/x86/mm/init_64.c   |   32 ++---
 drivers/nvdimm/pmem.c   |6 ++
 include/linux/io.h  |   17 ---
 include/linux/memory_hotplug.h  |3 +
 include/linux/mm.h  |   98 ++-
 kernel/memremap.c   |   77 +++
 mm/memory_hotplug.c |   66 +++---
 mm/page_alloc.c |   10 
 mm/sparse-vmemmap.c |   37 ++-
 mm/sparse.c |8 ++-
 13 files changed, 294 insertions(+), 63 deletions(-)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 5029f73e6294..884f2f7e4caf 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -125,6 +125,7 @@ static inline void *__va(unsigned long x)
  */
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 extern int m68k_virt_to_node_shift;
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef209169579a..7845eca0b36d 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -24,6 +24,7 @@ extern unsigned long memory_end;
 
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) 
>> PAGE_SHIFT))
 #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + 
PAGE_OFFSET))
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h
index 8288e124165b..3810a6f740fd 100644
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -107,6 +107,7 @@ static inline int get_order(unsigned long size)
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 #define pfn_to_page(pfn)   (mem_map + ((pfn) - __pfn_disp))
 #define page_to_pfn(page)  ((unsigned long)((page) - mem_map) + __pfn_disp)
+#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
 
 #define pfn_valid(pfn) \
 ({ \
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e5d42f1a2a71..cabf8ceb0a6b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, 
int order)
 {
unsigned long magic;
unsigned int nr_pages = 1 << order;
+   struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
+
+   if (altmap) {
+   vmem_altmap_free(altmap, nr_pages);
+   return;
+   }
 
/* bootmem page has reserved flag */
if (PageReserved(page)) {
@@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
 {
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> 

Re: [PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()

2015-10-19 Thread Williams, Dan J
On Fri, 2015-10-09 at 20:55 -0400, Dan Williams wrote:
> In support of providing struct page for large persistent memory
> capacities, use struct vmem_altmap to change the default policy for
> allocating memory for the memmap array.  The default vmemmap_populate()
> allocates page table storage area from the page allocator.  Given
> persistent memory capacities relative to DRAM it may not be feasible to
> store the memmap in 'System Memory'.  Instead vmem_altmap represents
> pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
> requests.
> 
> Cc: H. Peter Anvin 
> Cc: Ingo Molnar 
> Cc: Dave Hansen 
> Cc: Andrew Morton 
> Signed-off-by: Dan Williams 
> ---

The kbuild test robot reported a crash with this patch when
CONFIG_ZONE_DEVICE=y && CONFIG_SPARSEMEM_VMEMMAP=n.  The ability to
specify an alternate location for the vmemmap needs to be gated on
CONFIG_SPARSEMEM_VMEMMAP=y.

Here's a refreshed patch with ifdef guards and a warning message if the
@altmap arg is passed to devm_memremap_pages() on a
CONFIG_SPARSEMEM_VMEMMAP=n kernel.


8<
Subject: x86, mm: introduce vmem_altmap to augment vmemmap_populate()

From: Dan Williams 

In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Dave Hansen 
Cc: Andrew Morton 
Reported-by: kbuild test robot 
Signed-off-by: Dan Williams 
---
 arch/m68k/include/asm/page_mm.h |1 
 arch/m68k/include/asm/page_no.h |1 
 arch/mn10300/include/asm/page.h |1 
 arch/x86/mm/init_64.c   |   32 ++---
 drivers/nvdimm/pmem.c   |6 ++
 include/linux/io.h  |   17 ---
 include/linux/memory_hotplug.h  |3 +
 include/linux/mm.h  |   98 ++-
 kernel/memremap.c   |   77 +++
 mm/memory_hotplug.c |   66 +++---
 mm/page_alloc.c |   10 
 mm/sparse-vmemmap.c |   37 ++-
 mm/sparse.c |8 ++-
 13 files changed, 294 insertions(+), 63 deletions(-)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 5029f73e6294..884f2f7e4caf 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -125,6 +125,7 @@ static inline void *__va(unsigned long x)
  */
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 extern int m68k_virt_to_node_shift;
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef209169579a..7845eca0b36d 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -24,6 +24,7 @@ extern unsigned long memory_end;
 
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) 
>> PAGE_SHIFT))
 #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + 
PAGE_OFFSET))
diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h
index 8288e124165b..3810a6f740fd 100644
--- a/arch/mn10300/include/asm/page.h
+++ b/arch/mn10300/include/asm/page.h
@@ -107,6 +107,7 @@ static inline int get_order(unsigned long size)
 #define pfn_to_kaddr(pfn)  __va((pfn) << PAGE_SHIFT)
 #define pfn_to_page(pfn)   (mem_map + ((pfn) - __pfn_disp))
 #define page_to_pfn(page)  ((unsigned long)((page) - mem_map) + __pfn_disp)
+#define __pfn_to_phys(pfn) PFN_PHYS(pfn)
 
 #define pfn_valid(pfn) \
 ({ \
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e5d42f1a2a71..cabf8ceb0a6b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, 
int order)
 {
unsigned long magic;
unsigned int nr_pages = 1 << order;
+   struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
+
+   if (altmap) {
+   vmem_altmap_free(altmap, nr_pages);
+   

[PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()

2015-10-09 Thread Dan Williams
In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Dave Hansen 
Cc: Andrew Morton 
Signed-off-by: Dan Williams 
---
 arch/m68k/include/asm/page_mm.h |1 
 arch/m68k/include/asm/page_no.h |1 
 arch/x86/mm/init_64.c   |   32 ++---
 drivers/nvdimm/pmem.c   |6 ++
 include/linux/io.h  |   17 ---
 include/linux/memory_hotplug.h  |3 +
 include/linux/mm.h  |   95 ++-
 kernel/memremap.c   |   69 +---
 mm/memory_hotplug.c |   66 +++
 mm/page_alloc.c |   10 
 mm/sparse-vmemmap.c |   37 ++-
 mm/sparse.c |8 ++-
 12 files changed, 282 insertions(+), 63 deletions(-)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 5029f73e6294..884f2f7e4caf 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -125,6 +125,7 @@ static inline void *__va(unsigned long x)
  */
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 extern int m68k_virt_to_node_shift;
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef209169579a..7845eca0b36d 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -24,6 +24,7 @@ extern unsigned long memory_end;
 
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) 
>> PAGE_SHIFT))
 #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + 
PAGE_OFFSET))
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e5d42f1a2a71..cabf8ceb0a6b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, 
int order)
 {
unsigned long magic;
unsigned int nr_pages = 1 << order;
+   struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
+
+   if (altmap) {
+   vmem_altmap_free(altmap, nr_pages);
+   return;
+   }
 
/* bootmem page has reserved flag */
if (PageReserved(page)) {
@@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
 {
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+   struct page *page = pfn_to_page(start_pfn);
+   struct vmem_altmap *altmap;
struct zone *zone;
int ret;
 
-   zone = page_zone(pfn_to_page(start_pfn));
-   kernel_physical_mapping_remove(start, start + size);
+   /* With altmap the first mapped page is offset from @start */
+   altmap = to_vmem_altmap((unsigned long) page);
+   if (altmap)
+   page += vmem_altmap_offset(altmap);
+   zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
+   kernel_physical_mapping_remove(start, start + size);
 
return ret;
 }
@@ -1234,7 +1246,7 @@ static void __meminitdata *p_start, *p_end;
 static int __meminitdata node_start;
 
 static int __meminit vmemmap_populate_hugepages(unsigned long start,
-   unsigned long end, int node)
+   unsigned long end, int node, struct vmem_altmap *altmap)
 {
unsigned long addr;
unsigned long next;
@@ -1257,7 +1269,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
if (pmd_none(*pmd)) {
void *p;
 
-   p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+   p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
pte_t entry;
 
@@ -1278,7 +1290,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
continue;
-   }
+   } else if (altmap)
+   return -ENOMEM; /* no fallback */
} else if (pmd_large(*pmd)) {
   

[PATCH v2 05/20] x86, mm: introduce vmem_altmap to augment vmemmap_populate()

2015-10-09 Thread Dan Williams
In support of providing struct page for large persistent memory
capacities, use struct vmem_altmap to change the default policy for
allocating memory for the memmap array.  The default vmemmap_populate()
allocates page table storage area from the page allocator.  Given
persistent memory capacities relative to DRAM it may not be feasible to
store the memmap in 'System Memory'.  Instead vmem_altmap represents
pre-allocated "device pages" to satisfy vmemmap_alloc_block_buf()
requests.

Cc: H. Peter Anvin 
Cc: Ingo Molnar 
Cc: Dave Hansen 
Cc: Andrew Morton 
Signed-off-by: Dan Williams 
---
 arch/m68k/include/asm/page_mm.h |1 
 arch/m68k/include/asm/page_no.h |1 
 arch/x86/mm/init_64.c   |   32 ++---
 drivers/nvdimm/pmem.c   |6 ++
 include/linux/io.h  |   17 ---
 include/linux/memory_hotplug.h  |3 +
 include/linux/mm.h  |   95 ++-
 kernel/memremap.c   |   69 +---
 mm/memory_hotplug.c |   66 +++
 mm/page_alloc.c |   10 
 mm/sparse-vmemmap.c |   37 ++-
 mm/sparse.c |8 ++-
 12 files changed, 282 insertions(+), 63 deletions(-)

diff --git a/arch/m68k/include/asm/page_mm.h b/arch/m68k/include/asm/page_mm.h
index 5029f73e6294..884f2f7e4caf 100644
--- a/arch/m68k/include/asm/page_mm.h
+++ b/arch/m68k/include/asm/page_mm.h
@@ -125,6 +125,7 @@ static inline void *__va(unsigned long x)
  */
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 extern int m68k_virt_to_node_shift;
 
diff --git a/arch/m68k/include/asm/page_no.h b/arch/m68k/include/asm/page_no.h
index ef209169579a..7845eca0b36d 100644
--- a/arch/m68k/include/asm/page_no.h
+++ b/arch/m68k/include/asm/page_no.h
@@ -24,6 +24,7 @@ extern unsigned long memory_end;
 
 #define virt_to_pfn(kaddr) (__pa(kaddr) >> PAGE_SHIFT)
 #define pfn_to_virt(pfn)   __va((pfn) << PAGE_SHIFT)
+#define__pfn_to_phys(pfn)  PFN_PHYS(pfn)
 
 #define virt_to_page(addr) (mem_map + (((unsigned long)(addr)-PAGE_OFFSET) 
>> PAGE_SHIFT))
 #define page_to_virt(page) __va(page) - mem_map) << PAGE_SHIFT) + 
PAGE_OFFSET))
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index e5d42f1a2a71..cabf8ceb0a6b 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -714,6 +714,12 @@ static void __meminit free_pagetable(struct page *page, 
int order)
 {
unsigned long magic;
unsigned int nr_pages = 1 << order;
+   struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
+
+   if (altmap) {
+   vmem_altmap_free(altmap, nr_pages);
+   return;
+   }
 
/* bootmem page has reserved flag */
if (PageReserved(page)) {
@@ -1018,13 +1024,19 @@ int __ref arch_remove_memory(u64 start, u64 size)
 {
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
+   struct page *page = pfn_to_page(start_pfn);
+   struct vmem_altmap *altmap;
struct zone *zone;
int ret;
 
-   zone = page_zone(pfn_to_page(start_pfn));
-   kernel_physical_mapping_remove(start, start + size);
+   /* With altmap the first mapped page is offset from @start */
+   altmap = to_vmem_altmap((unsigned long) page);
+   if (altmap)
+   page += vmem_altmap_offset(altmap);
+   zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
+   kernel_physical_mapping_remove(start, start + size);
 
return ret;
 }
@@ -1234,7 +1246,7 @@ static void __meminitdata *p_start, *p_end;
 static int __meminitdata node_start;
 
 static int __meminit vmemmap_populate_hugepages(unsigned long start,
-   unsigned long end, int node)
+   unsigned long end, int node, struct vmem_altmap *altmap)
 {
unsigned long addr;
unsigned long next;
@@ -1257,7 +1269,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
if (pmd_none(*pmd)) {
void *p;
 
-   p = vmemmap_alloc_block_buf(PMD_SIZE, node);
+   p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
if (p) {
pte_t entry;
 
@@ -1278,7 +1290,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned 
long start,
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
continue;
-   }
+   } else if (altmap)
+