Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon 31-07-17 16:27:46, Gerald Schaefer wrote: > On Mon, 31 Jul 2017 14:55:56 +0200 > Michal Hockowrote: > > > On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: > > [...] > > > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long > > > > start, unsigned long end, int node) > > > > * use large frames even if they are only > > > > partially > > > > * used. > > > > * Otherwise we would have also page tables > > > > since > > > > -* vmemmap_populate gets called for each section > > > > +* __vmemmap_populate gets called for each > > > > section > > > > * separately. */ > > > > if (MACHINE_HAS_EDAT1) { > > > > void *new_page; > > > > > > > > - new_page = > > > > vmemmap_alloc_block(PMD_SIZE, node); > > > > + new_page = > > > > __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > if (!new_page) > > > > goto out; > > > > pmd_val(*pm_dir) = __pa(new_page) | > > > > sgt_prot; > > > > > > There is another call to vmemmap_alloc_block() in this function, a couple > > > of lines below, this should also be replaced by > > > __vmemmap_alloc_block_buf(). > > > > I've noticed that one but in general I have only transformed PMD > > mappings because we shouldn't even get to pte level if the forme works > > AFAICS. Memory sections should be always 2MB aligned unless I am missing > > something. Or is this not true? > > vmemmap_populate() on s390 will only stop at pmd level if we have HW > support for large pages (MACHINE_HAS_EDAT1). In that case we will allocate > a PMD_SIZE block with vmemmap_alloc_block() and map it on pmd level as > a large page. > > Without HW large page support, we will continue to allocate a pte page, > populate the pmd entry with that, and fall through to the pte_none() > check below, with its PAGE_SIZE vmemmap_alloc_block() allocation. In this > case we should use the __vmemmap_alloc_block_buf(). OK, I see. I've considered s390 will support large pages in general. I will fold this in. Thanks! --- commit df13e3a1237c3fef399e26b0f5a015715df12ede Author: Michal Hocko Date: Mon Jul 31 16:34:18 2017 +0200 fold me "mm, arch: unify vmemmap_populate altmap handling" - use altmap even for ptes in case the HW doesn't support large pages as per Gerald Schaefer diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 07120bc137a1..764b6393e66c 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -273,7 +273,7 @@ int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int nod if (pte_none(*pt_dir)) { void *new_page; - new_page = vmemmap_alloc_block(PAGE_SIZE, node); + new_page = __vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (!new_page) goto out; pte_val(*pt_dir) = __pa(new_page) | pgt_prot; -- Michal Hocko SUSE Labs
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon 31-07-17 16:27:46, Gerald Schaefer wrote: > On Mon, 31 Jul 2017 14:55:56 +0200 > Michal Hocko wrote: > > > On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: > > [...] > > > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long > > > > start, unsigned long end, int node) > > > > * use large frames even if they are only > > > > partially > > > > * used. > > > > * Otherwise we would have also page tables > > > > since > > > > -* vmemmap_populate gets called for each section > > > > +* __vmemmap_populate gets called for each > > > > section > > > > * separately. */ > > > > if (MACHINE_HAS_EDAT1) { > > > > void *new_page; > > > > > > > > - new_page = > > > > vmemmap_alloc_block(PMD_SIZE, node); > > > > + new_page = > > > > __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > > > > if (!new_page) > > > > goto out; > > > > pmd_val(*pm_dir) = __pa(new_page) | > > > > sgt_prot; > > > > > > There is another call to vmemmap_alloc_block() in this function, a couple > > > of lines below, this should also be replaced by > > > __vmemmap_alloc_block_buf(). > > > > I've noticed that one but in general I have only transformed PMD > > mappings because we shouldn't even get to pte level if the forme works > > AFAICS. Memory sections should be always 2MB aligned unless I am missing > > something. Or is this not true? > > vmemmap_populate() on s390 will only stop at pmd level if we have HW > support for large pages (MACHINE_HAS_EDAT1). In that case we will allocate > a PMD_SIZE block with vmemmap_alloc_block() and map it on pmd level as > a large page. > > Without HW large page support, we will continue to allocate a pte page, > populate the pmd entry with that, and fall through to the pte_none() > check below, with its PAGE_SIZE vmemmap_alloc_block() allocation. In this > case we should use the __vmemmap_alloc_block_buf(). OK, I see. I've considered s390 will support large pages in general. I will fold this in. Thanks! --- commit df13e3a1237c3fef399e26b0f5a015715df12ede Author: Michal Hocko Date: Mon Jul 31 16:34:18 2017 +0200 fold me "mm, arch: unify vmemmap_populate altmap handling" - use altmap even for ptes in case the HW doesn't support large pages as per Gerald Schaefer diff --git a/arch/s390/mm/vmem.c b/arch/s390/mm/vmem.c index 07120bc137a1..764b6393e66c 100644 --- a/arch/s390/mm/vmem.c +++ b/arch/s390/mm/vmem.c @@ -273,7 +273,7 @@ int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int nod if (pte_none(*pt_dir)) { void *new_page; - new_page = vmemmap_alloc_block(PAGE_SIZE, node); + new_page = __vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (!new_page) goto out; pte_val(*pt_dir) = __pa(new_page) | pgt_prot; -- Michal Hocko SUSE Labs
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon, 31 Jul 2017 14:55:56 +0200 Michal Hockowrote: > On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: > [...] > > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long start, > > > unsigned long end, int node) > > >* use large frames even if they are only partially > > >* used. > > >* Otherwise we would have also page tables since > > > - * vmemmap_populate gets called for each section > > > + * __vmemmap_populate gets called for each section > > >* separately. */ > > > if (MACHINE_HAS_EDAT1) { > > > void *new_page; > > > > > > - new_page = vmemmap_alloc_block(PMD_SIZE, node); > > > + new_page = __vmemmap_alloc_block_buf(PMD_SIZE, > > > node, altmap); > > > if (!new_page) > > > goto out; > > > pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; > > > > There is another call to vmemmap_alloc_block() in this function, a couple > > of lines below, this should also be replaced by __vmemmap_alloc_block_buf(). > > I've noticed that one but in general I have only transformed PMD > mappings because we shouldn't even get to pte level if the forme works > AFAICS. Memory sections should be always 2MB aligned unless I am missing > something. Or is this not true? vmemmap_populate() on s390 will only stop at pmd level if we have HW support for large pages (MACHINE_HAS_EDAT1). In that case we will allocate a PMD_SIZE block with vmemmap_alloc_block() and map it on pmd level as a large page. Without HW large page support, we will continue to allocate a pte page, populate the pmd entry with that, and fall through to the pte_none() check below, with its PAGE_SIZE vmemmap_alloc_block() allocation. In this case we should use the __vmemmap_alloc_block_buf(). Regards, Gerald
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon, 31 Jul 2017 14:55:56 +0200 Michal Hocko wrote: > On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: > [...] > > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long start, > > > unsigned long end, int node) > > >* use large frames even if they are only partially > > >* used. > > >* Otherwise we would have also page tables since > > > - * vmemmap_populate gets called for each section > > > + * __vmemmap_populate gets called for each section > > >* separately. */ > > > if (MACHINE_HAS_EDAT1) { > > > void *new_page; > > > > > > - new_page = vmemmap_alloc_block(PMD_SIZE, node); > > > + new_page = __vmemmap_alloc_block_buf(PMD_SIZE, > > > node, altmap); > > > if (!new_page) > > > goto out; > > > pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; > > > > There is another call to vmemmap_alloc_block() in this function, a couple > > of lines below, this should also be replaced by __vmemmap_alloc_block_buf(). > > I've noticed that one but in general I have only transformed PMD > mappings because we shouldn't even get to pte level if the forme works > AFAICS. Memory sections should be always 2MB aligned unless I am missing > something. Or is this not true? vmemmap_populate() on s390 will only stop at pmd level if we have HW support for large pages (MACHINE_HAS_EDAT1). In that case we will allocate a PMD_SIZE block with vmemmap_alloc_block() and map it on pmd level as a large page. Without HW large page support, we will continue to allocate a pte page, populate the pmd entry with that, and fall through to the pte_none() check below, with its PAGE_SIZE vmemmap_alloc_block() allocation. In this case we should use the __vmemmap_alloc_block_buf(). Regards, Gerald
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: [...] > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long start, > > unsigned long end, int node) > > * use large frames even if they are only partially > > * used. > > * Otherwise we would have also page tables since > > -* vmemmap_populate gets called for each section > > +* __vmemmap_populate gets called for each section > > * separately. */ > > if (MACHINE_HAS_EDAT1) { > > void *new_page; > > > > - new_page = vmemmap_alloc_block(PMD_SIZE, node); > > + new_page = __vmemmap_alloc_block_buf(PMD_SIZE, > > node, altmap); > > if (!new_page) > > goto out; > > pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; > > There is another call to vmemmap_alloc_block() in this function, a couple > of lines below, this should also be replaced by __vmemmap_alloc_block_buf(). I've noticed that one but in general I have only transformed PMD mappings because we shouldn't even get to pte level if the forme works AFAICS. Memory sections should be always 2MB aligned unless I am missing something. Or is this not true? -- Michal Hocko SUSE Labs
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Mon 31-07-17 14:40:53, Gerald Schaefer wrote: [...] > > @@ -247,12 +248,12 @@ int __meminit vmemmap_populate(unsigned long start, > > unsigned long end, int node) > > * use large frames even if they are only partially > > * used. > > * Otherwise we would have also page tables since > > -* vmemmap_populate gets called for each section > > +* __vmemmap_populate gets called for each section > > * separately. */ > > if (MACHINE_HAS_EDAT1) { > > void *new_page; > > > > - new_page = vmemmap_alloc_block(PMD_SIZE, node); > > + new_page = __vmemmap_alloc_block_buf(PMD_SIZE, > > node, altmap); > > if (!new_page) > > goto out; > > pmd_val(*pm_dir) = __pa(new_page) | sgt_prot; > > There is another call to vmemmap_alloc_block() in this function, a couple > of lines below, this should also be replaced by __vmemmap_alloc_block_buf(). I've noticed that one but in general I have only transformed PMD mappings because we shouldn't even get to pte level if the forme works AFAICS. Memory sections should be always 2MB aligned unless I am missing something. Or is this not true? -- Michal Hocko SUSE Labs
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Wed, 26 Jul 2017 10:33:30 +0200 Michal Hockowrote: > From: Michal Hocko > > vmem_altmap allows vmemmap_populate to allocate memmap (struct page > array) from an alternative allocator rather than bootmem resp. > kmalloc. Only x86 currently supports altmap handling, most likely > because only nvdim code uses this mechanism currently and the code > depends on ZONE_DEVICE which is present only for x86_64. This will > change in follow up changes so we would like other architectures > to support it as well. > > Provide vmemmap_populate generic implementation which simply resolves > altmap and then call into arch specific __vmemmap_populate. > Architectures then only need to use __vmemmap_alloc_block_buf to > allocate the memmap. vmemmap_free then needs to call vmem_altmap_free > if there is any altmap associated with the address. > > This patch shouldn't introduce any functional changes because > to_vmem_altmap always returns NULL on !x86_x64. > > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Tony Luck > Cc: Fenghua Yu > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Martin Schwidefsky > Cc: Heiko Carstens > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: linuxppc-...@lists.ozlabs.org > Cc: linux-i...@vger.kernel.org > Cc: x...@kernel.org > Signed-off-by: Michal Hocko > --- > arch/arm64/mm/mmu.c | 9 ++--- > arch/ia64/mm/discontig.c | 4 +++- > arch/powerpc/mm/init_64.c | 29 - > arch/s390/mm/vmem.c | 7 --- > arch/sparc/mm/init_64.c | 6 +++--- > arch/x86/mm/init_64.c | 4 ++-- > include/linux/memremap.h | 13 ++--- > include/linux/mm.h| 19 ++- > mm/sparse-vmemmap.c | 2 +- > 9 files changed, 59 insertions(+), 34 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 0c429ec6fde8..5de1161e7a1b 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -649,12 +649,15 @@ int kern_addr_valid(unsigned long addr) > } > #ifdef CONFIG_SPARSEMEM_VMEMMAP > #if !ARM64_SWAPPER_USES_SECTION_MAPS > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > + WARN(altmap, "altmap unsupported\n"); > return vmemmap_populate_basepages(start, end, node); > } > #else/* !ARM64_SWAPPER_USES_SECTION_MAPS */ > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > unsigned long addr = start; > unsigned long next; > @@ -677,7 +680,7 @@ int __meminit vmemmap_populate(unsigned long start, > unsigned long end, int node) > if (pmd_none(*pmd)) { > void *p = NULL; > > - p = vmemmap_alloc_block_buf(PMD_SIZE, node); > + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > if (!p) > return -ENOMEM; > > diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c > index 878626805369..2a939e877ced 100644 > --- a/arch/ia64/mm/discontig.c > +++ b/arch/ia64/mm/discontig.c > @@ -753,8 +753,10 @@ void arch_refresh_nodedata(int update_node, pg_data_t > *update_pgdat) > #endif > > #ifdef CONFIG_SPARSEMEM_VMEMMAP > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > + WARN(altmap, "altmap unsupported\n"); > return vmemmap_populate_basepages(start, end, node); > } > > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c > index ec84b31c6c86..5ea5e870a589 100644 > --- a/arch/powerpc/mm/init_64.c > +++ b/arch/powerpc/mm/init_64.c > @@ -44,6 +44,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -115,7 +116,8 @@ static struct vmemmap_backing *next; > static int num_left; > static int num_freed; > > -static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) > +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node, > + struct vmem_altmap *altmap) > { > struct vmemmap_backing *vmem_back; > /* get from freed entries first */ > @@ -129,7 +131,7 @@ static __meminit struct vmemmap_backing * > vmemmap_list_alloc(int node) > > /* allocate a page when
Re: [RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
On Wed, 26 Jul 2017 10:33:30 +0200 Michal Hocko wrote: > From: Michal Hocko > > vmem_altmap allows vmemmap_populate to allocate memmap (struct page > array) from an alternative allocator rather than bootmem resp. > kmalloc. Only x86 currently supports altmap handling, most likely > because only nvdim code uses this mechanism currently and the code > depends on ZONE_DEVICE which is present only for x86_64. This will > change in follow up changes so we would like other architectures > to support it as well. > > Provide vmemmap_populate generic implementation which simply resolves > altmap and then call into arch specific __vmemmap_populate. > Architectures then only need to use __vmemmap_alloc_block_buf to > allocate the memmap. vmemmap_free then needs to call vmem_altmap_free > if there is any altmap associated with the address. > > This patch shouldn't introduce any functional changes because > to_vmem_altmap always returns NULL on !x86_x64. > > Cc: Catalin Marinas > Cc: Will Deacon > Cc: Tony Luck > Cc: Fenghua Yu > Cc: Benjamin Herrenschmidt > Cc: Paul Mackerras > Cc: Michael Ellerman > Cc: Martin Schwidefsky > Cc: Heiko Carstens > Cc: Thomas Gleixner > Cc: Ingo Molnar > Cc: "H. Peter Anvin" > Cc: linuxppc-...@lists.ozlabs.org > Cc: linux-i...@vger.kernel.org > Cc: x...@kernel.org > Signed-off-by: Michal Hocko > --- > arch/arm64/mm/mmu.c | 9 ++--- > arch/ia64/mm/discontig.c | 4 +++- > arch/powerpc/mm/init_64.c | 29 - > arch/s390/mm/vmem.c | 7 --- > arch/sparc/mm/init_64.c | 6 +++--- > arch/x86/mm/init_64.c | 4 ++-- > include/linux/memremap.h | 13 ++--- > include/linux/mm.h| 19 ++- > mm/sparse-vmemmap.c | 2 +- > 9 files changed, 59 insertions(+), 34 deletions(-) > > diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c > index 0c429ec6fde8..5de1161e7a1b 100644 > --- a/arch/arm64/mm/mmu.c > +++ b/arch/arm64/mm/mmu.c > @@ -649,12 +649,15 @@ int kern_addr_valid(unsigned long addr) > } > #ifdef CONFIG_SPARSEMEM_VMEMMAP > #if !ARM64_SWAPPER_USES_SECTION_MAPS > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > + WARN(altmap, "altmap unsupported\n"); > return vmemmap_populate_basepages(start, end, node); > } > #else/* !ARM64_SWAPPER_USES_SECTION_MAPS */ > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > unsigned long addr = start; > unsigned long next; > @@ -677,7 +680,7 @@ int __meminit vmemmap_populate(unsigned long start, > unsigned long end, int node) > if (pmd_none(*pmd)) { > void *p = NULL; > > - p = vmemmap_alloc_block_buf(PMD_SIZE, node); > + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); > if (!p) > return -ENOMEM; > > diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c > index 878626805369..2a939e877ced 100644 > --- a/arch/ia64/mm/discontig.c > +++ b/arch/ia64/mm/discontig.c > @@ -753,8 +753,10 @@ void arch_refresh_nodedata(int update_node, pg_data_t > *update_pgdat) > #endif > > #ifdef CONFIG_SPARSEMEM_VMEMMAP > -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int > node) > +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int > node, > + struct vmem_altmap *altmap) > { > + WARN(altmap, "altmap unsupported\n"); > return vmemmap_populate_basepages(start, end, node); > } > > diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c > index ec84b31c6c86..5ea5e870a589 100644 > --- a/arch/powerpc/mm/init_64.c > +++ b/arch/powerpc/mm/init_64.c > @@ -44,6 +44,7 @@ > #include > #include > #include > +#include > > #include > #include > @@ -115,7 +116,8 @@ static struct vmemmap_backing *next; > static int num_left; > static int num_freed; > > -static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) > +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node, > + struct vmem_altmap *altmap) > { > struct vmemmap_backing *vmem_back; > /* get from freed entries first */ > @@ -129,7 +131,7 @@ static __meminit struct vmemmap_backing * > vmemmap_list_alloc(int node) > > /* allocate a page when required and hand out chunks */ > if (!num_left) { > - next = vmemmap_alloc_block(PAGE_SIZE, node); > + next = __vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); > if (unlikely(!next)) { > WARN_ON(1); > return NULL; > @@
[RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
From: Michal Hockovmem_altmap allows vmemmap_populate to allocate memmap (struct page array) from an alternative allocator rather than bootmem resp. kmalloc. Only x86 currently supports altmap handling, most likely because only nvdim code uses this mechanism currently and the code depends on ZONE_DEVICE which is present only for x86_64. This will change in follow up changes so we would like other architectures to support it as well. Provide vmemmap_populate generic implementation which simply resolves altmap and then call into arch specific __vmemmap_populate. Architectures then only need to use __vmemmap_alloc_block_buf to allocate the memmap. vmemmap_free then needs to call vmem_altmap_free if there is any altmap associated with the address. This patch shouldn't introduce any functional changes because to_vmem_altmap always returns NULL on !x86_x64. Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: linuxppc-...@lists.ozlabs.org Cc: linux-i...@vger.kernel.org Cc: x...@kernel.org Signed-off-by: Michal Hocko --- arch/arm64/mm/mmu.c | 9 ++--- arch/ia64/mm/discontig.c | 4 +++- arch/powerpc/mm/init_64.c | 29 - arch/s390/mm/vmem.c | 7 --- arch/sparc/mm/init_64.c | 6 +++--- arch/x86/mm/init_64.c | 4 ++-- include/linux/memremap.h | 13 ++--- include/linux/mm.h| 19 ++- mm/sparse-vmemmap.c | 2 +- 9 files changed, 59 insertions(+), 34 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..5de1161e7a1b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -649,12 +649,15 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { + WARN(altmap, "altmap unsupported\n"); return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; @@ -677,7 +680,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (pmd_none(*pmd)) { void *p = NULL; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (!p) return -ENOMEM; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 878626805369..2a939e877ced 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -753,8 +753,10 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { + WARN(altmap, "altmap unsupported\n"); return vmemmap_populate_basepages(start, end, node); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ec84b31c6c86..5ea5e870a589 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -115,7 +116,8 @@ static struct vmemmap_backing *next; static int num_left; static int num_freed; -static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node, + struct vmem_altmap *altmap) { struct vmemmap_backing *vmem_back; /* get from freed entries first */ @@ -129,7 +131,7 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) /* allocate a page when required and hand out chunks */ if (!num_left) { - next = vmemmap_alloc_block(PAGE_SIZE, node); + next = __vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (unlikely(!next)) { WARN_ON(1);
[RFC PATCH 2/5] mm, arch: unify vmemmap_populate altmap handling
From: Michal Hocko vmem_altmap allows vmemmap_populate to allocate memmap (struct page array) from an alternative allocator rather than bootmem resp. kmalloc. Only x86 currently supports altmap handling, most likely because only nvdim code uses this mechanism currently and the code depends on ZONE_DEVICE which is present only for x86_64. This will change in follow up changes so we would like other architectures to support it as well. Provide vmemmap_populate generic implementation which simply resolves altmap and then call into arch specific __vmemmap_populate. Architectures then only need to use __vmemmap_alloc_block_buf to allocate the memmap. vmemmap_free then needs to call vmem_altmap_free if there is any altmap associated with the address. This patch shouldn't introduce any functional changes because to_vmem_altmap always returns NULL on !x86_x64. Cc: Catalin Marinas Cc: Will Deacon Cc: Tony Luck Cc: Fenghua Yu Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: linuxppc-...@lists.ozlabs.org Cc: linux-i...@vger.kernel.org Cc: x...@kernel.org Signed-off-by: Michal Hocko --- arch/arm64/mm/mmu.c | 9 ++--- arch/ia64/mm/discontig.c | 4 +++- arch/powerpc/mm/init_64.c | 29 - arch/s390/mm/vmem.c | 7 --- arch/sparc/mm/init_64.c | 6 +++--- arch/x86/mm/init_64.c | 4 ++-- include/linux/memremap.h | 13 ++--- include/linux/mm.h| 19 ++- mm/sparse-vmemmap.c | 2 +- 9 files changed, 59 insertions(+), 34 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 0c429ec6fde8..5de1161e7a1b 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -649,12 +649,15 @@ int kern_addr_valid(unsigned long addr) } #ifdef CONFIG_SPARSEMEM_VMEMMAP #if !ARM64_SWAPPER_USES_SECTION_MAPS -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { + WARN(altmap, "altmap unsupported\n"); return vmemmap_populate_basepages(start, end, node); } #else /* !ARM64_SWAPPER_USES_SECTION_MAPS */ -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { unsigned long addr = start; unsigned long next; @@ -677,7 +680,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) if (pmd_none(*pmd)) { void *p = NULL; - p = vmemmap_alloc_block_buf(PMD_SIZE, node); + p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); if (!p) return -ENOMEM; diff --git a/arch/ia64/mm/discontig.c b/arch/ia64/mm/discontig.c index 878626805369..2a939e877ced 100644 --- a/arch/ia64/mm/discontig.c +++ b/arch/ia64/mm/discontig.c @@ -753,8 +753,10 @@ void arch_refresh_nodedata(int update_node, pg_data_t *update_pgdat) #endif #ifdef CONFIG_SPARSEMEM_VMEMMAP -int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) +int __meminit __vmemmap_populate(unsigned long start, unsigned long end, int node, + struct vmem_altmap *altmap) { + WARN(altmap, "altmap unsupported\n"); return vmemmap_populate_basepages(start, end, node); } diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index ec84b31c6c86..5ea5e870a589 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include @@ -115,7 +116,8 @@ static struct vmemmap_backing *next; static int num_left; static int num_freed; -static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) +static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node, + struct vmem_altmap *altmap) { struct vmemmap_backing *vmem_back; /* get from freed entries first */ @@ -129,7 +131,7 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) /* allocate a page when required and hand out chunks */ if (!num_left) { - next = vmemmap_alloc_block(PAGE_SIZE, node); + next = __vmemmap_alloc_block_buf(PAGE_SIZE, node, altmap); if (unlikely(!next)) { WARN_ON(1); return NULL; @@ -144,11 +146,12 @@ static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node) static __meminit void vmemmap_list_populate(unsigned long phys, unsigned long start, - int node) +