Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator

2017-05-19 Thread Michal Hocko
On Wed 17-05-17 10:11:37, Vlastimil Babka wrote:
> The main allocator function __alloc_pages_nodemask() takes a zonelist pointer
> as one of its parameters. All of its callers directly or indirectly obtain the
> zonelist via node_zonelist() using a preferred node id and gfp_mask. We can
> make the code a bit simpler by doing the zonelist lookup in
> __alloc_pages_nodemask(), passing it a preferred node id instead (gfp_mask is
> already another parameter).
> 
> There are some code size benefits thanks to removal of inlined 
> node_zonelist():
> 
> bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952)
> 
> This will also make things simpler if we proceed with converting cpusets to
> zonelists.
> 
> Signed-off-by: Vlastimil Babka 

Makes sense to me
Acked-by: Michal Hocko 

> ---
>  include/linux/gfp.h   | 11 +--
>  include/linux/mempolicy.h |  6 +++---
>  mm/hugetlb.c  | 15 +--
>  mm/memory_hotplug.c   |  6 ++
>  mm/mempolicy.c| 41 +++--
>  mm/page_alloc.c   | 10 +-
>  6 files changed, 43 insertions(+), 46 deletions(-)
> 
> diff --git a/include/linux/gfp.h b/include/linux/gfp.h
> index 2b1a44f5bdb6..666af3c39d00 100644
> --- a/include/linux/gfp.h
> +++ b/include/linux/gfp.h
> @@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, 
> int order) { }
>  #endif
>  
>  struct page *
> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> -struct zonelist *zonelist, nodemask_t *nodemask);
> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
> + nodemask_t *nodemask);
>  
>  static inline struct page *
> -__alloc_pages(gfp_t gfp_mask, unsigned int order,
> - struct zonelist *zonelist)
> +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
>  {
> - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
> + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
>  }
>  
>  /*
> @@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int 
> order)
>   VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
>   VM_WARN_ON(!node_online(nid));
>  
> - return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
> + return __alloc_pages(gfp_mask, order, nid);
>  }
>  
>  /*
> diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
> index 5f4d8281832b..ecb6cbeede5a 100644
> --- a/include/linux/mempolicy.h
> +++ b/include/linux/mempolicy.h
> @@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, 
> const nodemask_t *new,
>   enum mpol_rebind_step step);
>  extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
>  
> -extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
> +extern int huge_node(struct vm_area_struct *vma,
>   unsigned long addr, gfp_t gfp_flags,
>   struct mempolicy **mpol, nodemask_t **nodemask);
>  extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
> @@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, 
> nodemask_t *new)
>  {
>  }
>  
> -static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
> +static inline int huge_node(struct vm_area_struct *vma,
>   unsigned long addr, gfp_t gfp_flags,
>   struct mempolicy **mpol, nodemask_t **nodemask)
>  {
>   *mpol = NULL;
>   *nodemask = NULL;
> - return node_zonelist(0, gfp_flags);
> + return 0;
>  }
>  
>  static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
> diff --git a/mm/hugetlb.c b/mm/hugetlb.c
> index e5828875f7bb..9f1f399bb913 100644
> --- a/mm/hugetlb.c
> +++ b/mm/hugetlb.c
> @@ -904,6 +904,8 @@ static struct page *dequeue_huge_page_vma(struct hstate 
> *h,
>   struct page *page = NULL;
>   struct mempolicy *mpol;
>   nodemask_t *nodemask;
> + gfp_t gfp_mask;
> + int nid;
>   struct zonelist *zonelist;
>   struct zone *zone;
>   struct zoneref *z;
> @@ -924,12 +926,13 @@ static struct page *dequeue_huge_page_vma(struct hstate 
> *h,
>  
>  retry_cpuset:
>   cpuset_mems_cookie = read_mems_allowed_begin();
> - zonelist = huge_zonelist(vma, address,
> - htlb_alloc_mask(h), &mpol, &nodemask);
> + gfp_mask = htlb_alloc_mask(h);
> + nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
> + zonelist = node_zonelist(nid, gfp_mask);
>  
>   for_each_zone_zonelist_nodemask(zone, z, zonelist,
>   MAX_NR_ZONES - 1, nodemask) {
> - if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
> + if (cpuset_zone_allowed(zone, gfp_mask)) {
>   page = dequeue_huge_page_node(h, zone_to_nid(zone

Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator

2017-05-18 Thread Vlastimil Babka
On 05/17/2017 05:19 PM, Christoph Lameter wrote:
> On Wed, 17 May 2017, Vlastimil Babka wrote:
> 
>>  struct page *
>> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
>> -   struct zonelist *zonelist, nodemask_t *nodemask);
>> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int 
>> preferred_nid,
>> +nodemask_t *nodemask);
>>
>>  static inline struct page *
>> -__alloc_pages(gfp_t gfp_mask, unsigned int order,
>> -struct zonelist *zonelist)
>> +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
>>  {
>> -return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
>> +return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
>>  }
> 
> Maybe use nid instead of preferred_nid like in __alloc_pages? Otherwise
> there may be confusion with the MPOL_PREFER policy.

I'll think about that.

>> @@ -1963,8 +1960,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
>> vm_area_struct *vma,
>>  {
>>  struct mempolicy *pol;
>>  struct page *page;
>> +int preferred_nid;
>>  unsigned int cpuset_mems_cookie;
>> -struct zonelist *zl;
>>  nodemask_t *nmask;
> 
> Same here.
> 
>> @@ -4012,8 +4012,8 @@ static inline void finalise_ac(gfp_t gfp_mask,
>>   * This is the 'heart' of the zoned buddy allocator.
>>   */
>>  struct page *
>> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
>> -struct zonelist *zonelist, nodemask_t *nodemask)
>> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int 
>> preferred_nid,
>> +nodemask_t *nodemask)
>>  {
> 
> and here
> 
> This looks clean to me. Still feel a bit uneasy about this since I do
> remember that we had a reason to use zonelists instead of nodes back then
> but cannot remember what that reason was

My history digging showed me that mempolicies used to have a custom
zonelist attached, not nodemask. So I supposed that's why.

> CCing Dimitri at SGI. This may break a lot of legacy SGIapps. If you read
> this Dimitri then please review this patchset and the discussions around
> it.

Break how? This shouldn't break any apps AFAICS, just out-of-tree kernel
patches/modules as usual when APIs change.

> Reviewed-by: Christoph Lameter 

Thanks!



Re: [PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator

2017-05-17 Thread Christoph Lameter
On Wed, 17 May 2017, Vlastimil Babka wrote:

>  struct page *
> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> -struct zonelist *zonelist, nodemask_t *nodemask);
> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
> + nodemask_t *nodemask);
>
>  static inline struct page *
> -__alloc_pages(gfp_t gfp_mask, unsigned int order,
> - struct zonelist *zonelist)
> +__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
>  {
> - return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
> + return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
>  }

Maybe use nid instead of preferred_nid like in __alloc_pages? Otherwise
there may be confusion with the MPOL_PREFER policy.

> @@ -1963,8 +1960,8 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
> vm_area_struct *vma,
>  {
>   struct mempolicy *pol;
>   struct page *page;
> + int preferred_nid;
>   unsigned int cpuset_mems_cookie;
> - struct zonelist *zl;
>   nodemask_t *nmask;

Same here.

> @@ -4012,8 +4012,8 @@ static inline void finalise_ac(gfp_t gfp_mask,
>   * This is the 'heart' of the zoned buddy allocator.
>   */
>  struct page *
> -__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
> - struct zonelist *zonelist, nodemask_t *nodemask)
> +__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
> + nodemask_t *nodemask)
>  {

and here

This looks clean to me. Still feel a bit uneasy about this since I do
remember that we had a reason to use zonelists instead of nodes back then
but cannot remember what that reason was

CCing Dimitri at SGI. This may break a lot of legacy SGIapps. If you read
this Dimitri then please review this patchset and the discussions around
it.

Reviewed-by: Christoph Lameter 



[PATCH v2 3/6] mm, page_alloc: pass preferred nid instead of zonelist to allocator

2017-05-17 Thread Vlastimil Babka
The main allocator function __alloc_pages_nodemask() takes a zonelist pointer
as one of its parameters. All of its callers directly or indirectly obtain the
zonelist via node_zonelist() using a preferred node id and gfp_mask. We can
make the code a bit simpler by doing the zonelist lookup in
__alloc_pages_nodemask(), passing it a preferred node id instead (gfp_mask is
already another parameter).

There are some code size benefits thanks to removal of inlined node_zonelist():

bloat-o-meter add/remove: 2/2 grow/shrink: 4/36 up/down: 399/-1351 (-952)

This will also make things simpler if we proceed with converting cpusets to
zonelists.

Signed-off-by: Vlastimil Babka 
---
 include/linux/gfp.h   | 11 +--
 include/linux/mempolicy.h |  6 +++---
 mm/hugetlb.c  | 15 +--
 mm/memory_hotplug.c   |  6 ++
 mm/mempolicy.c| 41 +++--
 mm/page_alloc.c   | 10 +-
 6 files changed, 43 insertions(+), 46 deletions(-)

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 2b1a44f5bdb6..666af3c39d00 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -432,14 +432,13 @@ static inline void arch_alloc_page(struct page *page, int 
order) { }
 #endif
 
 struct page *
-__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
-  struct zonelist *zonelist, nodemask_t *nodemask);
+__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, int preferred_nid,
+   nodemask_t *nodemask);
 
 static inline struct page *
-__alloc_pages(gfp_t gfp_mask, unsigned int order,
-   struct zonelist *zonelist)
+__alloc_pages(gfp_t gfp_mask, unsigned int order, int preferred_nid)
 {
-   return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
+   return __alloc_pages_nodemask(gfp_mask, order, preferred_nid, NULL);
 }
 
 /*
@@ -452,7 +451,7 @@ __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int 
order)
VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);
VM_WARN_ON(!node_online(nid));
 
-   return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
+   return __alloc_pages(gfp_mask, order, nid);
 }
 
 /*
diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5f4d8281832b..ecb6cbeede5a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -146,7 +146,7 @@ extern void mpol_rebind_task(struct task_struct *tsk, const 
nodemask_t *new,
enum mpol_rebind_step step);
 extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
 
-extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+extern int huge_node(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask);
 extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
@@ -269,13 +269,13 @@ static inline void mpol_rebind_mm(struct mm_struct *mm, 
nodemask_t *new)
 {
 }
 
-static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
+static inline int huge_node(struct vm_area_struct *vma,
unsigned long addr, gfp_t gfp_flags,
struct mempolicy **mpol, nodemask_t **nodemask)
 {
*mpol = NULL;
*nodemask = NULL;
-   return node_zonelist(0, gfp_flags);
+   return 0;
 }
 
 static inline bool init_nodemask_of_mempolicy(nodemask_t *m)
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index e5828875f7bb..9f1f399bb913 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -904,6 +904,8 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
struct page *page = NULL;
struct mempolicy *mpol;
nodemask_t *nodemask;
+   gfp_t gfp_mask;
+   int nid;
struct zonelist *zonelist;
struct zone *zone;
struct zoneref *z;
@@ -924,12 +926,13 @@ static struct page *dequeue_huge_page_vma(struct hstate 
*h,
 
 retry_cpuset:
cpuset_mems_cookie = read_mems_allowed_begin();
-   zonelist = huge_zonelist(vma, address,
-   htlb_alloc_mask(h), &mpol, &nodemask);
+   gfp_mask = htlb_alloc_mask(h);
+   nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
+   zonelist = node_zonelist(nid, gfp_mask);
 
for_each_zone_zonelist_nodemask(zone, z, zonelist,
MAX_NR_ZONES - 1, nodemask) {
-   if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
+   if (cpuset_zone_allowed(zone, gfp_mask)) {
page = dequeue_huge_page_node(h, zone_to_nid(zone));
if (page) {
if (avoid_reserve)
@@ -1545,13 +1548,13 @@ static struct page 
*__hugetlb_alloc_buddy_huge_page(struct hstate *h,
do {
struct page *page;
struct memp