On Wed, Jul 22, 2020 at 10:57:56AM +0100, Chris Wilson wrote:

> Perhaps more damning is that I can replace WF_ON_CPU with p->on_cpu to
> suppress the warning:

*argh*, I'm starting to go mad...

Chris, could you please try the below patch?

Can you also confirm that if you do:

$ echo NO_TTWU_QUEUE_ON_CPU > /debug/sched_features

or wherever else system-doofus mounts debugfs these days,
the issue no longer manifests? Because if I don't get a handle on this
soon we might have to disable this thing for now :/


---
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a2a244af9a537..8218779734288 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2430,13 +2430,15 @@ bool cpus_share_cache(int this_cpu, int that_cpu)
        return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
 }
 
-static inline bool ttwu_queue_cond(int cpu, int wake_flags)
+static inline bool ttwu_queue_cond(struct task_struct *p, int cpu, int 
wake_flags)
 {
+       int this_cpu = smp_processor_id();
+
        /*
         * If the CPU does not share cache, then queue the task on the
         * remote rqs wakelist to avoid accessing remote data.
         */
-       if (!cpus_share_cache(smp_processor_id(), cpu))
+       if (!cpus_share_cache(this_cpu, cpu))
                return true;
 
        /*
@@ -2445,15 +2447,30 @@ static inline bool ttwu_queue_cond(int cpu, int 
wake_flags)
         * the soon-to-be-idle CPU as the current CPU is likely busy.
         * nr_running is checked to avoid unnecessary task stacking.
         */
-       if ((wake_flags & WF_ON_CPU) && cpu_rq(cpu)->nr_running <= 1)
-               return true;
+       if (wake_flags & WF_ON_CPU) {
+
+               if (unlikely(cpu == this_cpu)) {
+                       int on_cpu = READ_ONCE(p->on_cpu);
+                       int cpu1 = task_cpu(p);
+
+                       smp_rmb();
+                       smp_cond_load_acquire(&p->on_cpu, !VAL);
+
+                       pr_alert("ttwu-IPI-self: %d==%d, p->on_cpu=%d;0, 
task_cpu(p)=%d;%d\n",
+                                cpu, this_cpu, on_cpu, cpu1, task_cpu(p));
+
+                       return false;
+               }
+
+               return cpu_rq(cpu)->nr_running <= 1;
+       }
 
        return false;
 }
 
 static bool ttwu_queue_wakelist(struct task_struct *p, int cpu, int wake_flags)
 {
-       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(cpu, wake_flags)) {
+       if (sched_feat(TTWU_QUEUE) && ttwu_queue_cond(p, cpu, wake_flags)) {
                if (WARN_ON_ONCE(cpu == smp_processor_id()))
                        return false;
 
@@ -2713,7 +2730,7 @@ try_to_wake_up(struct task_struct *p, unsigned int state, 
int wake_flags)
         * to ensure we observe the correct CPU on which the task is currently
         * scheduling.
         */
-       if (smp_load_acquire(&p->on_cpu) &&
+       if (sched_feat(TTWU_QUEUE_ON_CPU) && smp_load_acquire(&p->on_cpu) &&
            ttwu_queue_wakelist(p, task_cpu(p), wake_flags | WF_ON_CPU))
                goto unlock;
 
diff --git a/kernel/sched/features.h b/kernel/sched/features.h
index 7481cd96f3915..b231a840c3eba 100644
--- a/kernel/sched/features.h
+++ b/kernel/sched/features.h
@@ -50,6 +50,7 @@ SCHED_FEAT(NONTASK_CAPACITY, true)
  * using the scheduler IPI. Reduces rq->lock contention/bounces.
  */
 SCHED_FEAT(TTWU_QUEUE, true)
+SCHED_FEAT(TTWU_QUEUE_ON_CPU, true)
 
 /*
  * When doing wakeups, attempt to limit superfluous scans of the LLC domain.

Reply via email to