Commit-ID:  0ad4e3dfe6cf3f207e61cbd8e3e4a943f8c1ad20
Gitweb:     https://git.kernel.org/tip/0ad4e3dfe6cf3f207e61cbd8e3e4a943f8c1ad20
Author:     Srikar Dronamraju <sri...@linux.vnet.ibm.com>
AuthorDate: Wed, 20 Jun 2018 22:32:50 +0530
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Wed, 25 Jul 2018 11:41:07 +0200

sched/numa: Modify migrate_swap() to accept additional parameters

There are checks in migrate_swap_stop() that check if the task/CPU
combination is as per migrate_swap_arg before migrating.

However atleast one of the two tasks to be swapped by migrate_swap() could
have migrated to a completely different CPU before updating the
migrate_swap_arg. The new CPU where the task is currently running could
be a different node too. If the task has migrated, numa balancer might
end up placing a task in a wrong node.  Instead of achieving node
consolidation, it may end up spreading the load across nodes.

To avoid that pass the CPUs as additional parameters.

While here, place migrate_swap under CONFIG_NUMA_BALANCING.

Running SPECjbb2005 on a 4 node machine and comparing bops/JVM
JVMS  LAST_PATCH  WITH_PATCH  %CHANGE
16    25377.3     25226.6     -0.59
1     72287       73326       1.437

Signed-off-by: Srikar Dronamraju <sri...@linux.vnet.ibm.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Reviewed-by: Rik van Riel <r...@surriel.com>
Acked-by: Mel Gorman <mgor...@techsingularity.net>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Link: 
http://lkml.kernel.org/r/1529514181-9842-10-git-send-email-sri...@linux.vnet.ibm.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 kernel/sched/core.c  | 9 ++++++---
 kernel/sched/fair.c  | 3 ++-
 kernel/sched/sched.h | 3 ++-
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 2bc391a574e6..deafa9fe602b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1176,6 +1176,7 @@ void set_task_cpu(struct task_struct *p, unsigned int 
new_cpu)
        __set_task_cpu(p, new_cpu);
 }
 
+#ifdef CONFIG_NUMA_BALANCING
 static void __migrate_swap_task(struct task_struct *p, int cpu)
 {
        if (task_on_rq_queued(p)) {
@@ -1257,16 +1258,17 @@ unlock:
 /*
  * Cross migrate two tasks
  */
-int migrate_swap(struct task_struct *cur, struct task_struct *p)
+int migrate_swap(struct task_struct *cur, struct task_struct *p,
+               int target_cpu, int curr_cpu)
 {
        struct migration_swap_arg arg;
        int ret = -EINVAL;
 
        arg = (struct migration_swap_arg){
                .src_task = cur,
-               .src_cpu = task_cpu(cur),
+               .src_cpu = curr_cpu,
                .dst_task = p,
-               .dst_cpu = task_cpu(p),
+               .dst_cpu = target_cpu,
        };
 
        if (arg.src_cpu == arg.dst_cpu)
@@ -1291,6 +1293,7 @@ int migrate_swap(struct task_struct *cur, struct 
task_struct *p)
 out:
        return ret;
 }
+#endif /* CONFIG_NUMA_BALANCING */
 
 /*
  * wait_task_inactive - wait for a thread to unschedule.
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 4ac60b296d96..7b4eddec3ccc 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1848,7 +1848,8 @@ static int task_numa_migrate(struct task_struct *p)
                return ret;
        }
 
-       ret = migrate_swap(p, env.best_task);
+       ret = migrate_swap(p, env.best_task, env.best_cpu, env.src_cpu);
+
        if (ret != 0)
                trace_sched_stick_numa(p, env.src_cpu, task_cpu(env.best_task));
        put_task_struct(env.best_task);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 614170d9b1aa..4a2e8cae63c4 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1099,7 +1099,8 @@ enum numa_faults_stats {
 };
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
-extern int migrate_swap(struct task_struct *, struct task_struct *);
+extern int migrate_swap(struct task_struct *p, struct task_struct *t,
+                       int cpu, int scpu);
 extern void init_numa_balancing(unsigned long clone_flags, struct task_struct 
*p);
 #else
 static inline void

Reply via email to