From: Rik van Riel <r...@redhat.com>

Excessive migration of pages can hurt the performance of workloads
that span multiple NUMA nodes.  However, it turns out that the
p->numa_migrate_deferred knob is a really big hammer, which does
reduce migration rates, but does not actually help performance.

It is time to rip it out, and replace it with something smarter.

Signed-off-by: Rik van Riel <r...@redhat.com>
---
 include/linux/sched.h |  1 -
 kernel/sched/fair.c   |  8 --------
 kernel/sysctl.c       |  7 -------
 mm/mempolicy.c        | 45 ---------------------------------------------
 4 files changed, 61 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 42f2baf..9e4cb598 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1345,7 +1345,6 @@ struct task_struct {
        unsigned int numa_scan_period;
        unsigned int numa_scan_period_max;
        int numa_preferred_nid;
-       int numa_migrate_deferred;
        unsigned long numa_migrate_retry;
        u64 node_stamp;                 /* migration stamp  */
        struct callback_head numa_work;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 7f9b376..410858e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -794,14 +794,6 @@ unsigned int sysctl_numa_balancing_scan_size = 256;
 /* Scan @scan_size MB every @scan_period after an initial @scan_delay in ms */
 unsigned int sysctl_numa_balancing_scan_delay = 1000;
 
-/*
- * After skipping a page migration on a shared page, skip N more numa page
- * migrations unconditionally. This reduces the number of NUMA migrations
- * in shared memory workloads, and has the effect of pulling tasks towards
- * where their memory lives, over pulling the memory towards the task.
- */
-unsigned int sysctl_numa_balancing_migrate_deferred = 16;
-
 static unsigned int task_nr_scan_windows(struct task_struct *p)
 {
        unsigned long rss = 0;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 14c4f51..821e3f1 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -392,13 +392,6 @@ static struct ctl_table kern_table[] = {
                .mode           = 0644,
                .proc_handler   = proc_dointvec,
        },
-       {
-               .procname       = "numa_balancing_migrate_deferred",
-               .data           = &sysctl_numa_balancing_migrate_deferred,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = proc_dointvec,
-       },
 #endif /* CONFIG_NUMA_BALANCING */
 #endif /* CONFIG_SCHED_DEBUG */
        {
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9a2f6dd..0522aa2 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -2247,35 +2247,6 @@ static void sp_free(struct sp_node *n)
        kmem_cache_free(sn_cache, n);
 }
 
-#ifdef CONFIG_NUMA_BALANCING
-static bool numa_migrate_deferred(struct task_struct *p, int last_cpupid)
-{
-       /* Never defer a private fault */
-       if (cpupid_match_pid(p, last_cpupid))
-               return false;
-
-       if (p->numa_migrate_deferred) {
-               p->numa_migrate_deferred--;
-               return true;
-       }
-       return false;
-}
-
-static inline void defer_numa_migrate(struct task_struct *p)
-{
-       p->numa_migrate_deferred = sysctl_numa_balancing_migrate_deferred;
-}
-#else
-static inline bool numa_migrate_deferred(struct task_struct *p, int 
last_cpupid)
-{
-       return false;
-}
-
-static inline void defer_numa_migrate(struct task_struct *p)
-{
-}
-#endif /* CONFIG_NUMA_BALANCING */
-
 /**
  * mpol_misplaced - check whether current page node is valid in policy
  *
@@ -2378,24 +2349,8 @@ int mpol_misplaced(struct page *page, struct 
vm_area_struct *vma, unsigned long
                 */
                last_cpupid = page_cpupid_xchg_last(page, this_cpupid);
                if (!cpupid_pid_unset(last_cpupid) && 
cpupid_to_nid(last_cpupid) != thisnid) {
-
-                       /* See sysctl_numa_balancing_migrate_deferred comment */
-                       if (!cpupid_match_pid(current, last_cpupid))
-                               defer_numa_migrate(current);
-
                        goto out;
                }
-
-               /*
-                * The quadratic filter above reduces extraneous migration
-                * of shared pages somewhat. This code reduces it even more,
-                * reducing the overhead of page migrations of shared pages.
-                * This makes workloads with shared pages rely more on
-                * "move task near its memory", and less on "move memory
-                * towards its task", which is exactly what we want.
-                */
-               if (numa_migrate_deferred(current, last_cpupid))
-                       goto out;
        }
 
        if (curnid != polnid)
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to