Hi all,

Today's linux-next merge of the tip tree got a conflict in mm/mempolicy.c
between commit 63f74ca21f1f ("mempolicy: fix refcount leak in
mpol_set_shared_policy()") from Linus' tree and commit 4d58c795f691
("mm/mpol: Check for misplaced page") from the tip tree.

I fixed it up (see below) and can carry the fix as necessary (no action
is required).

-- 
Cheers,
Stephen Rothwell                    s...@canb.auug.org.au

diff --cc mm/mempolicy.c
index 0b78fb9,3360a8d..0000000
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@@ -2170,12 -2168,116 +2203,122 @@@ mpol_shared_policy_lookup(struct shared
        return pol;
  }
  
 +static void sp_free(struct sp_node *n)
 +{
 +      mpol_put(n->policy);
 +      kmem_cache_free(sn_cache, n);
 +}
 +
+ /**
+  * mpol_misplaced - check whether current page node is valid in policy
+  *
+  * @page   - page to be checked
+  * @vma    - vm area where page mapped
+  * @addr   - virtual address where page mapped
+  * @multi  - use multi-stage node binding
+  *
+  * Lookup current policy node id for vma,addr and "compare to" page's
+  * node id.
+  *
+  * Returns:
+  *    -1      - not misplaced, page is in the right node
+  *    node    - node id where the page should be
+  *
+  * Policy determination "mimics" alloc_page_vma().
+  * Called from fault path where we know the vma and faulting address.
+  */
+ int mpol_misplaced(struct page *page, struct vm_area_struct *vma,
+                  unsigned long addr, int multi)
+ {
+       struct mempolicy *pol;
+       struct zone *zone;
+       int curnid = page_to_nid(page);
+       unsigned long pgoff;
+       int polnid = -1;
+       int ret = -1;
+ 
+       BUG_ON(!vma);
+ 
+       pol = get_vma_policy(current, vma, addr);
+       if (!(pol->flags & MPOL_F_MOF))
+               goto out;
+ 
+       switch (pol->mode) {
+       case MPOL_INTERLEAVE:
+               BUG_ON(addr >= vma->vm_end);
+               BUG_ON(addr < vma->vm_start);
+ 
+               pgoff = vma->vm_pgoff;
+               pgoff += (addr - vma->vm_start) >> PAGE_SHIFT;
+               polnid = offset_il_node(pol, vma, pgoff);
+               break;
+ 
+       case MPOL_PREFERRED:
+               if (pol->flags & MPOL_F_LOCAL)
+                       polnid = numa_node_id();
+               else
+                       polnid = pol->v.preferred_node;
+               break;
+ 
+       case MPOL_BIND:
+               /*
+                * allows binding to multiple nodes.
+                * use current page if in policy nodemask,
+                * else select nearest allowed node, if any.
+                * If no allowed nodes, use current [!misplaced].
+                */
+               if (node_isset(curnid, pol->v.nodes))
+                       goto out;
+               (void)first_zones_zonelist(
+                               node_zonelist(numa_node_id(), GFP_HIGHUSER),
+                               gfp_zone(GFP_HIGHUSER),
+                               &pol->v.nodes, &zone);
+               polnid = zone->node;
+               break;
+ 
+       default:
+               BUG();
+       }
+ 
+       /*
+        * Multi-stage node selection is used in conjunction with a periodic
+        * migration fault to build a temporal task<->page relation. By
+        * using a two-stage filter we remove short/unlikely relations.
+        *
+        * Using P(p) ~ n_p / n_t as per frequentist probability, we can
+        * equate a task's usage of a particular page (n_p) per total usage
+        * of this page (n_t) (in a given time-span) to a probability.
+        *
+        * Our periodic faults will then sample this probability and getting
+        * the same result twice in a row, given these samples are fully
+        * independent, is then given by P(n)^2, provided our sample period
+        * is sufficiently short compared to the usage pattern.
+        *
+        * This quadric squishes small probabilities, making it less likely
+        * we act on an unlikely task<->page relation.
+        *
+        * NOTE: effectively we're using task-home-node<->page-node relations
+        * since those are the only thing we can affect.
+        *
+        * NOTE: we're using task-home-node as opposed to the current node
+        * the task might be running on, since the task-home-node is the
+        * long-term node of this task, further reducing noise. Also see
+        * task_tick_numa().
+        */
+       if (multi && (pol->flags & MPOL_F_HOME)) {
+               int last_nid = page_xchg_last_nid(page, polnid);
+               if (last_nid != polnid)
+                       goto out;
+       }
+ 
+       if (curnid != polnid)
+               ret = polnid;
+ out:
+       mpol_cond_put(pol);
+ 
+       return ret;
+ }
+ 
  static void sp_delete(struct shared_policy *sp, struct sp_node *n)
  {
        pr_debug("deleting %lx-l%lx\n", n->start, n->end);

Attachment: pgpVglgEmApPz.pgp
Description: PGP signature

Reply via email to