In a dom0less configuration, if the null scheduler is used, the system
may fail to boot, because the loop in null_unit_wake() never exits.
Bisection showed that this behavior occurs since commit d545f1d6 ("xen:
sched: deal with vCPUs being or becoming online or offline") but the
real problem is that, in this case, pick_res() always return the same
CPU.
Fix this by only deal with the simple case, i.e., the vCPU that is
coming online can be assigned to a sched. resource right away, in
null_unit_wake().
If it can't, just add it to the waitqueue, and we will deal with it in
null_schedule(), being careful about not racing with vcpu_wake().
Reported-by: Stefano Stabellini
Signed-off-by: Dario Faggioli
Tested-by: Stefano Stabellini
---
xen/common/sched_null.c | 113 +++
1 file changed, 75 insertions(+), 38 deletions(-)
diff --git a/xen/common/sched_null.c b/xen/common/sched_null.c
index 2525464a7c..88bd11a187 100644
--- a/xen/common/sched_null.c
+++ b/xen/common/sched_null.c
@@ -568,50 +568,52 @@ static void null_unit_wake(const struct scheduler *ops,
else
SCHED_STAT_CRANK(unit_wake_not_runnable);
+if ( likely(per_cpu(npc, cpu).unit == unit) )
+{
+cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
+return;
+}
+
/*
* If a unit is neither on a pCPU nor in the waitqueue, it means it was
- * offline, and that it is now coming back being online.
+ * offline, and that it is now coming back being online. If we're lucky,
+ * and it's previous resource is free (and affinities match), we can just
+ * assign the unit to it (we own the proper lock already) and be done.
*/
-if ( unlikely(per_cpu(npc, cpu).unit != unit &&
list_empty(>waitq_elem)) )
+if ( per_cpu(npc, cpu).unit == NULL &&
+ unit_check_affinity(unit, cpu, BALANCE_HARD_AFFINITY) )
{
-spin_lock(>waitq_lock);
-list_add_tail(>waitq_elem, >waitq);
-spin_unlock(>waitq_lock);
-
-cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
-cpupool_domain_master_cpumask(unit->domain));
-
-if ( !cpumask_intersects(>cpus_free, cpumask_scratch_cpu(cpu)) )
+if ( !has_soft_affinity(unit) ||
+ unit_check_affinity(unit, cpu, BALANCE_SOFT_AFFINITY) )
{
-dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any
CPU!\n",
-unit->domain->domain_id, unit->unit_id);
+unit_assign(prv, unit, cpu);
+cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ);
return;
}
+}
-/*
- * Now we would want to assign the unit to cpu, but we can't, because
- * we don't have the lock. So, let's do the following:
- * - try to remove cpu from the list of free cpus, to avoid races with
- * other onlining, inserting or migrating operations;
- * - tickle the cpu, which will pickup work from the waitqueue, and
- * assign it to itself;
- * - if we're racing already, and if there still are free cpus, try
- * again.
- */
-while ( cpumask_intersects(>cpus_free, cpumask_scratch_cpu(cpu)) )
-{
-unsigned int new_cpu = pick_res(prv, unit)->master_cpu;
+/*
+ * If the resource is not free (or affinities do not match) we need
+ * to assign unit to some other one, but we can't do it here, as:
+ * - we don't own the proper lock,
+ * - we can't change v->processor under vcpu_wake()'s feet.
+ * So we add it to the waitqueue, and tickle all the free CPUs (if any)
+ * on which unit can run. The first one that schedules will pick it up.
+ */
+spin_lock(>waitq_lock);
+list_add_tail(>waitq_elem, >waitq);
+spin_unlock(>waitq_lock);
-if ( test_and_clear_bit(new_cpu, >cpus_free) )
-{
-cpu_raise_softirq(new_cpu, SCHEDULE_SOFTIRQ);
-return;
-}
-}
-}
+cpumask_and(cpumask_scratch_cpu(cpu), unit->cpu_hard_affinity,
+cpupool_domain_master_cpumask(unit->domain));
+cpumask_and(cpumask_scratch_cpu(cpu), cpumask_scratch_cpu(cpu),
+>cpus_free);
-/* Note that we get here only for units assigned to a pCPU */
-cpu_raise_softirq(sched_unit_master(unit), SCHEDULE_SOFTIRQ);
+if ( cpumask_empty(cpumask_scratch_cpu(cpu)) )
+dprintk(XENLOG_G_WARNING, "WARNING: d%dv%d not assigned to any CPU!\n",
+unit->domain->domain_id, unit->unit_id);
+else
+cpumask_raise_softirq(cpumask_scratch_cpu(cpu), SCHEDULE_SOFTIRQ);
}
static void null_unit_sleep(const struct scheduler *ops,
@@ -827,6 +829,8 @@ static void null_schedule(const struct scheduler *ops,
struct sched_unit *prev,
*/
if ( unlikely(prev->next_task == NULL) )
{
+bool unit_found;
+
spin_lock(>waitq_lock);
if (