Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator
On Wed 17-05-17 10:11:37, Vlastimil Babka wrote: > The main allocator function __alloc_pages_nodemask() takes a zonelist pointer > as one of its parameters. All of its callers directly or indirectly obtain the > zonelist via node_zonelist() using a preferred node id and gfp_mask. We can > make the code a bit simpler by doing the zonelist lookup in > __alloc_pages_nodemask(), passing it a preferred node id instead (gfp_mask is > already another parameter). > > There are some code size benefits thanks to removal of inlined > node_zonelist(): > > bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952) > > This will also make things simpler if we proceed with converting cpusets to > zonelists. > > Signed-off-by: Vlastimil Babka Makes sense to me Acked-by: Michal Hocko > --- > include/linux/gfp.h | 11 +-- > include/linux/mempolicy.h | 6 +++--- > mm/hugetlb.c | 15 +-- > mm/memory_hotplug.c | 6 ++ > mm/mempolicy.c| 41 +++-- > mm/page_alloc.c | 10 +- > 6 files changed, 43 insertions(+), 46 deletions(-) > > diff --git a/include/linux/gfp.h b/include/linux/gfp.h > index 2b1a44f5bdb6..666af3c39d00 100644 > --- a/include/linux/gfp.h > +++ b/include/linux/gfp.h > @@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, > int order) { } > #endif > > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, > -struct zonelist *zonelist, nodemask_t *nodemask); > +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > + nodemask_t *nodemask); > > static inline struct page * > -__alloc_pages(gfp_t gfp_mask, unsigned int order, > - struct zonelist *zonelist) > +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) > { > - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); > + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); > } > > /* > @@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int > order) > VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); > VM_WARN_ON(!node_online(nid)); > > - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); > + return __alloc_pages(gfp_mask, order, nid); > } > > /* > diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h > index 5f4d8281832b..ecb6cbeede5a 100644 > --- a/include/linux/mempolicy.h > +++ b/include/linux/mempolicy.h > @@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, > const nodemask_t *new, > enum mpol_rebind_step step); > extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); > > -extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, > +extern int huge_node(struct vm_area_struct *vma, > unsigned long addr, gfp_t gfp_flags, > struct mempolicy **mpol, nodemask_t **nodemask); > extern bool init_nodemask_of_mempolicy(nodemask_t *mask); > @@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, > nodemask_t *new) > { > } > > -static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, > +static inline int huge_node(struct vm_area_struct *vma, > unsigned long addr, gfp_t gfp_flags, > struct mempolicy **mpol, nodemask_t **nodemask) > { > *mpol = NULL; > *nodemask = NULL; > - return node_zonelist(0, gfp_flags); > + return 0; > } > > static inline bool init_nodemask_of_mempolicy(nodemask_t *m) > diff --git a/mm/hugetlb.c b/mm/hugetlb.c > index e5828875f7bb..9f1f399bb913 100644 > --- a/mm/hugetlb.c > +++ b/mm/hugetlb.c > @@ -904,6 +904,8 @@ static struct page *dequeue_huge_page_vma(struct hstate > *h, > struct page *page = NULL; > struct mempolicy *mpol; > nodemask_t *nodemask; > + gfp_t gfp_mask; > + int nid; > struct zonelist *zonelist; > struct zone *zone; > struct zoneref *z; > @@ -924,12 +926,13 @@ static struct page *dequeue_huge_page_vma(struct hstate > *h, > > retry_cpuset: > cpuset_mems_cookie = read_mems_allowed_begin(); > - zonelist = huge_zonelist(vma, address, > - htlb_alloc_mask(h), &mpol, &nodemask); > + gfp_mask = htlb_alloc_mask(h); > + nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); > + zonelist = node_zonelist(nid, gfp_mask); > > for_each_zone_zonelist_nodemask(zone, z, zonelist, > MAX_NR_ZONES - 1, nodemask) { > - if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) { > + if (cpuset_zone_allowed(zone, gfp_mask)) { > page = dequeue_huge_page_node(h, zone_to_nid(zone
Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator
On 05/17/2017 05:19 PM, Christoph Lameter wrote: > On Wed, 17 May 2017, Vlastimil Babka wrote: > >> struct page * >> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, >> - struct zonelist *zonelist, nodemask_t *nodemask); >> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int >> preferred_nid, >> +nodemask_t *nodemask); >> >> static inline struct page * >> -__alloc_pages(gfp_t gfp_mask, unsigned int order, >> -struct zonelist *zonelist) >> +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) >> { >> -return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); >> +return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); >> } > > Maybe use nid instead of preferred_nid like in __alloc_pages? Otherwise > there may be confusion with the MPOL_PREFER policy. I'll think about that. >> @@ -1963,8 +1960,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct >> vm_area_struct *vma, >> { >> struct mempolicy *pol; >> struct page *page; >> +int preferred_nid; >> unsigned int cpuset_mems_cookie; >> -struct zonelist *zl; >> nodemask_t *nmask; > > Same here. > >> @@ -4012,8 +4012,8 @@ static inline void finalise_ac(gfp_t gfp_mask, >> * This is the 'heart' of the zoned buddy allocator. >> */ >> struct page * >> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, >> -struct zonelist *zonelist, nodemask_t *nodemask) >> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int >> preferred_nid, >> +nodemask_t *nodemask) >> { > > and here > > This looks clean to me. Still feel a bit uneasy about this since I do > remember that we had a reason to use zonelists instead of nodes back then > but cannot remember what that reason was My history digging showed me that mempolicies used to have a custom zonelist attached, not nodemask. So I supposed that's why. > CCing Dimitri at SGI. This may break a lot of legacy SGIapps. If you read > this Dimitri then please review this patchset and the discussions around > it. Break how? This shouldn't break any apps AFAICS, just out-of-tree kernel patches/modules as usual when APIs change. > Reviewed-by: Christoph Lameter Thanks!
Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator
On Wed, 17 May 2017, Vlastimil Babka wrote: > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, > -struct zonelist *zonelist, nodemask_t *nodemask); > +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > + nodemask_t *nodemask); > > static inline struct page * > -__alloc_pages(gfp_t gfp_mask, unsigned int order, > - struct zonelist *zonelist) > +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) > { > - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); > + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); > } Maybe use nid instead of preferred_nid like in __alloc_pages? Otherwise there may be confusion with the MPOL_PREFER policy. > @@ -1963,8 +1960,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct > vm_area_struct *vma, > { > struct mempolicy *pol; > struct page *page; > + int preferred_nid; > unsigned int cpuset_mems_cookie; > - struct zonelist *zl; > nodemask_t *nmask; Same here. > @@ -4012,8 +4012,8 @@ static inline void finalise_ac(gfp_t gfp_mask, > * This is the 'heart' of the zoned buddy allocator. > */ > struct page * > -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, > - struct zonelist *zonelist, nodemask_t *nodemask) > +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, > + nodemask_t *nodemask) > { and here This looks clean to me. Still feel a bit uneasy about this since I do remember that we had a reason to use zonelists instead of nodes back then but cannot remember what that reason was CCing Dimitri at SGI. This may break a lot of legacy SGIapps. If you read this Dimitri then please review this patchset and the discussions around it. Reviewed-by: Christoph Lameter
[PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator
The main allocator function __alloc_pages_nodemask() takes a zonelist pointer as one of its parameters. All of its callers directly or indirectly obtain the zonelist via node_zonelist() using a preferred node id and gfp_mask. We can make the code a bit simpler by doing the zonelist lookup in __alloc_pages_nodemask(), passing it a preferred node id instead (gfp_mask is already another parameter). There are some code size benefits thanks to removal of inlined node_zonelist(): bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952) This will also make things simpler if we proceed with converting cpusets to zonelists. Signed-off-by: Vlastimil Babka --- include/linux/gfp.h | 11 +-- include/linux/mempolicy.h | 6 +++--- mm/hugetlb.c | 15 +-- mm/memory_hotplug.c | 6 ++ mm/mempolicy.c| 41 +++-- mm/page_alloc.c | 10 +- 6 files changed, 43 insertions(+), 46 deletions(-) diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2b1a44f5bdb6..666af3c39d00 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, int order) { } #endif struct page * -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist, nodemask_t *nodemask); +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid, + nodemask_t *nodemask); static inline struct page * -__alloc_pages(gfp_t gfp_mask, unsigned int order, - struct zonelist *zonelist) +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid) { - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL); + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL); } /* @@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); VM_WARN_ON(!node_online(nid)); - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask)); + return __alloc_pages(gfp_mask, order, nid); } /* diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f4d8281832b..ecb6cbeede5a 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, enum mpol_rebind_step step); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); -extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, +extern int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask); extern bool init_nodemask_of_mempolicy(nodemask_t *mask); @@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new) { } -static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma, +static inline int huge_node(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, struct mempolicy **mpol, nodemask_t **nodemask) { *mpol = NULL; *nodemask = NULL; - return node_zonelist(0, gfp_flags); + return 0; } static inline bool init_nodemask_of_mempolicy(nodemask_t *m) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index e5828875f7bb..9f1f399bb913 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -904,6 +904,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, struct page *page = NULL; struct mempolicy *mpol; nodemask_t *nodemask; + gfp_t gfp_mask; + int nid; struct zonelist *zonelist; struct zone *zone; struct zoneref *z; @@ -924,12 +926,13 @@ static struct page *dequeue_huge_page_vma(struct hstate *h, retry_cpuset: cpuset_mems_cookie = read_mems_allowed_begin(); - zonelist = huge_zonelist(vma, address, - htlb_alloc_mask(h), &mpol, &nodemask); + gfp_mask = htlb_alloc_mask(h); + nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask); + zonelist = node_zonelist(nid, gfp_mask); for_each_zone_zonelist_nodemask(zone, z, zonelist, MAX_NR_ZONES - 1, nodemask) { - if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) { + if (cpuset_zone_allowed(zone, gfp_mask)) { page = dequeue_huge_page_node(h, zone_to_nid(zone)); if (page) { if (avoid_reserve) @@ -1545,13 +1548,13 @@ static struct page *__hugetlb_alloc_buddy_huge_page(struct hstate *h, do { struct page *page; struct memp