Re: [PATCH v4 10/13] mm/mempolicy: VMA allocation for many preferred

2021-04-14 Thread Michal Hocko
On Wed 17-03-21 11:40:07, Feng Tang wrote:
[...]
> @@ -2301,10 +2300,26 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
> vm_area_struct *vma,
>* does not allow the current node in its nodemask, we allocate
>* the standard way.
>*/
> - if ((pol->mode == MPOL_PREFERRED ||
> -  pol->mode == MPOL_PREFERRED_MANY) &&
> - !(pol->flags & MPOL_F_LOCAL))
> + if (pol->mode == MPOL_PREFERRED || !(pol->flags & 
> MPOL_F_LOCAL)) {
>   hpage_node = first_node(pol->nodes);
> + } else if (pol->mode == MPOL_PREFERRED_MANY) {
> + struct zoneref *z;
> +
> + /*
> +  * In this policy, with direct reclaim, the normal
> +  * policy based allocation will do the right thing - try
> +  * twice using the preferred nodes first, and all nodes
> +  * second.
> +  */
> + if (gfp & __GFP_DIRECT_RECLAIM) {
> + page = alloc_pages_policy(pol, gfp, order, 
> NUMA_NO_NODE);
> + goto out;
> + }
> +
> + z = first_zones_zonelist(node_zonelist(numa_node_id(), 
> GFP_HIGHUSER),
> +  gfp_zone(GFP_HIGHUSER), 
> >nodes);
> + hpage_node = zone_to_nid(z->zone);
> + }
>  
>   nmask = policy_nodemask(gfp, pol);
>   if (!nmask || node_isset(hpage_node, *nmask)) {
> @@ -2330,9 +2345,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
> vm_area_struct *vma,
>   }
>   }
>  
> - nmask = policy_nodemask(gfp, pol);
> - preferred_nid = policy_node(gfp, pol, node);
> - page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
> + page = alloc_pages_policy(pol, gfp, order, NUMA_NO_NODE);
>   mpol_cond_put(pol);
>  out:
>   return page;

OK, it took me a while to grasp this but the code is a mess I have to
say. Not that it was an act of beauty before but this just makes it much
harder to follow. And alloc_pages_policy doesn't really help I have to
say. I would have expected that a dedicated alloc_pages_preferred and a
general fallback to __alloc_pages_nodemask would have been much easier
to follow.
-- 
Michal Hocko
SUSE Labs


[PATCH v4 10/13] mm/mempolicy: VMA allocation for many preferred

2021-03-16 Thread Feng Tang
From: Ben Widawsky 

This patch implements MPOL_PREFERRED_MANY for alloc_pages_vma(). Like
alloc_pages_current(), alloc_pages_vma() needs to support policy based
decisions if they've been configured via mbind(2).

The temporary "hack" of treating MPOL_PREFERRED and MPOL_PREFERRED_MANY
can now be removed with this, too.

All the actual machinery to make this work was part of
("mm/mempolicy: Create a page allocator for policy")

Link: https://lore.kernel.org/r/20200630212517.308045-11-ben.widaw...@intel.com
Signed-off-by: Ben Widawsky 
Signed-off-by: Feng Tang 
---
 mm/mempolicy.c | 29 +
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index a92efe7..8fe76a7 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2273,8 +2273,6 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
vm_area_struct *vma,
 {
struct mempolicy *pol;
struct page *page;
-   int preferred_nid;
-   nodemask_t *nmask;
 
pol = get_vma_policy(vma, addr);
 
@@ -2288,6 +2286,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
vm_area_struct *vma,
}
 
if (unlikely(IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE) && hugepage)) {
+   nodemask_t *nmask;
int hpage_node = node;
 
/*
@@ -2301,10 +2300,26 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
vm_area_struct *vma,
 * does not allow the current node in its nodemask, we allocate
 * the standard way.
 */
-   if ((pol->mode == MPOL_PREFERRED ||
-pol->mode == MPOL_PREFERRED_MANY) &&
-   !(pol->flags & MPOL_F_LOCAL))
+   if (pol->mode == MPOL_PREFERRED || !(pol->flags & 
MPOL_F_LOCAL)) {
hpage_node = first_node(pol->nodes);
+   } else if (pol->mode == MPOL_PREFERRED_MANY) {
+   struct zoneref *z;
+
+   /*
+* In this policy, with direct reclaim, the normal
+* policy based allocation will do the right thing - try
+* twice using the preferred nodes first, and all nodes
+* second.
+*/
+   if (gfp & __GFP_DIRECT_RECLAIM) {
+   page = alloc_pages_policy(pol, gfp, order, 
NUMA_NO_NODE);
+   goto out;
+   }
+
+   z = first_zones_zonelist(node_zonelist(numa_node_id(), 
GFP_HIGHUSER),
+gfp_zone(GFP_HIGHUSER), 
>nodes);
+   hpage_node = zone_to_nid(z->zone);
+   }
 
nmask = policy_nodemask(gfp, pol);
if (!nmask || node_isset(hpage_node, *nmask)) {
@@ -2330,9 +2345,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct 
vm_area_struct *vma,
}
}
 
-   nmask = policy_nodemask(gfp, pol);
-   preferred_nid = policy_node(gfp, pol, node);
-   page = __alloc_pages_nodemask(gfp, order, preferred_nid, nmask);
+   page = alloc_pages_policy(pol, gfp, order, NUMA_NO_NODE);
mpol_cond_put(pol);
 out:
return page;
-- 
2.7.4