On 15/06/18 09:01, Juri Lelli wrote:

[...]

> I'll try harder to find alternatives, but suggestions are welcome! :-)

I wonder if something like the following might actually work. IIUC
cpuset.c comment [1], callback_lock is the one to actually take if one
needs to only query cpusets.

[1] https://elixir.bootlin.com/linux/latest/source/kernel/cgroup/cpuset.c#L266

--->8---
 include/linux/cpuset.h |  4 +--
 kernel/cgroup/cpuset.c | 72 +++++++++++++++++++++++++-------------------------
 kernel/sched/core.c    | 24 +++++++----------
 3 files changed, 48 insertions(+), 52 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index a1970862ab8e..4bbb3f5a3020 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -55,7 +55,7 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
-extern int cpuset_lock(void);
+extern void cpuset_lock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
@@ -178,7 +178,7 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
-static inline int cpuset_lock(void) { return 1; }
+static inline void cpuset_lock(void) { }
 
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index d26fd4795aa3..d5a0b4ec31af 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -288,7 +288,7 @@ static struct cpuset top_cpuset = {
  */
 
 static DEFINE_MUTEX(cpuset_mutex);
-static DEFINE_SPINLOCK(callback_lock);
+static DEFINE_RAW_SPINLOCK(callback_lock);
 
 static struct workqueue_struct *cpuset_migrate_mm_wq;
 
@@ -921,9 +921,9 @@ static void update_cpumasks_hier(struct cpuset *cs, struct 
cpumask *new_cpus)
                        continue;
                rcu_read_unlock();
 
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                cpumask_copy(cp->effective_cpus, new_cpus);
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
 
                WARN_ON(!is_in_v2_mode() &&
                        !cpumask_equal(cp->cpus_allowed, cp->effective_cpus));
@@ -988,9 +988,9 @@ static int update_cpumask(struct cpuset *cs, struct cpuset 
*trialcs,
        if (retval < 0)
                return retval;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, trialcs->cpus_allowed);
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        /* use trialcs->cpus_allowed as a temp variable */
        update_cpumasks_hier(cs, trialcs->cpus_allowed);
@@ -1174,9 +1174,9 @@ static void update_nodemasks_hier(struct cpuset *cs, 
nodemask_t *new_mems)
                        continue;
                rcu_read_unlock();
 
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                cp->effective_mems = *new_mems;
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
 
                WARN_ON(!is_in_v2_mode() &&
                        !nodes_equal(cp->mems_allowed, cp->effective_mems));
@@ -1244,9 +1244,9 @@ static int update_nodemask(struct cpuset *cs, struct 
cpuset *trialcs,
        if (retval < 0)
                goto done;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->mems_allowed = trialcs->mems_allowed;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        /* use trialcs->mems_allowed as a temp variable */
        update_nodemasks_hier(cs, &trialcs->mems_allowed);
@@ -1337,9 +1337,9 @@ static int update_flag(cpuset_flagbits_t bit, struct 
cpuset *cs,
        spread_flag_changed = ((is_spread_slab(cs) != is_spread_slab(trialcs))
                        || (is_spread_page(cs) != is_spread_page(trialcs)));
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->flags = trialcs->flags;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
                rebuild_sched_domains_locked();
@@ -1754,7 +1754,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, 
void *v)
        cpuset_filetype_t type = seq_cft(sf)->private;
        int ret = 0;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
 
        switch (type) {
        case FILE_CPULIST:
@@ -1773,7 +1773,7 @@ static int cpuset_common_seq_show(struct seq_file *sf, 
void *v)
                ret = -EINVAL;
        }
 
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
        return ret;
 }
 
@@ -1988,12 +1988,12 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
 
        cpuset_inc();
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        if (is_in_v2_mode()) {
                cpumask_copy(cs->effective_cpus, parent->effective_cpus);
                cs->effective_mems = parent->effective_mems;
        }
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (!test_bit(CGRP_CPUSET_CLONE_CHILDREN, &css->cgroup->flags))
                goto out_unlock;
@@ -2020,12 +2020,12 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
        }
        rcu_read_unlock();
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cs->mems_allowed = parent->mems_allowed;
        cs->effective_mems = parent->mems_allowed;
        cpumask_copy(cs->cpus_allowed, parent->cpus_allowed);
        cpumask_copy(cs->effective_cpus, parent->cpus_allowed);
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 out_unlock:
        mutex_unlock(&cpuset_mutex);
        return 0;
@@ -2064,7 +2064,7 @@ static void cpuset_css_free(struct cgroup_subsys_state 
*css)
 static void cpuset_bind(struct cgroup_subsys_state *root_css)
 {
        mutex_lock(&cpuset_mutex);
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
 
        if (is_in_v2_mode()) {
                cpumask_copy(top_cpuset.cpus_allowed, cpu_possible_mask);
@@ -2075,7 +2075,7 @@ static void cpuset_bind(struct cgroup_subsys_state 
*root_css)
                top_cpuset.mems_allowed = top_cpuset.effective_mems;
        }
 
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
        mutex_unlock(&cpuset_mutex);
 }
 
@@ -2173,12 +2173,12 @@ hotplug_update_tasks_legacy(struct cpuset *cs,
 {
        bool is_empty;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->cpus_allowed, new_cpus);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->mems_allowed = *new_mems;
        cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        /*
         * Don't call update_tasks_cpumask() if the cpuset becomes empty,
@@ -2215,10 +2215,10 @@ hotplug_update_tasks(struct cpuset *cs,
        if (nodes_empty(*new_mems))
                *new_mems = parent_cs(cs)->effective_mems;
 
-       spin_lock_irq(&callback_lock);
+       raw_spin_lock_irq(&callback_lock);
        cpumask_copy(cs->effective_cpus, new_cpus);
        cs->effective_mems = *new_mems;
-       spin_unlock_irq(&callback_lock);
+       raw_spin_unlock_irq(&callback_lock);
 
        if (cpus_updated)
                update_tasks_cpumask(cs);
@@ -2311,21 +2311,21 @@ static void cpuset_hotplug_workfn(struct work_struct 
*work)
 
        /* synchronize cpus_allowed to cpu_active_mask */
        if (cpus_updated) {
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
                cpumask_copy(top_cpuset.effective_cpus, &new_cpus);
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
                /* we don't mess with cpumasks of tasks in top_cpuset */
        }
 
        /* synchronize mems_allowed to N_MEMORY */
        if (mems_updated) {
-               spin_lock_irq(&callback_lock);
+               raw_spin_lock_irq(&callback_lock);
                if (!on_dfl)
                        top_cpuset.mems_allowed = new_mems;
                top_cpuset.effective_mems = new_mems;
-               spin_unlock_irq(&callback_lock);
+               raw_spin_unlock_irq(&callback_lock);
                update_tasks_nodemask(&top_cpuset);
        }
 
@@ -2412,9 +2412,9 @@ void __init cpuset_init_smp(void)
 /**
  * cpuset_lock - Grab the cpuset_mutex from another subsysytem
  */
-int cpuset_lock(void)
+void cpuset_lock(void)
 {
-       return mutex_trylock(&cpuset_mutex);
+       raw_spin_lock(&callback_lock);
 }
 
 /**
@@ -2422,7 +2422,7 @@ int cpuset_lock(void)
  */
 void cpuset_unlock(void)
 {
-       mutex_unlock(&cpuset_mutex);
+       raw_spin_unlock(&callback_lock);
 }
 
 /**
@@ -2440,11 +2440,11 @@ void cpuset_cpus_allowed(struct task_struct *tsk, 
struct cpumask *pmask)
 {
        unsigned long flags;
 
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();
        guarantee_online_cpus(task_cs(tsk), pmask);
        rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
 }
 
 void cpuset_cpus_allowed_fallback(struct task_struct *tsk)
@@ -2492,11 +2492,11 @@ nodemask_t cpuset_mems_allowed(struct task_struct *tsk)
        nodemask_t mask;
        unsigned long flags;
 
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
        rcu_read_lock();
        guarantee_online_mems(task_cs(tsk), &mask);
        rcu_read_unlock();
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
 
        return mask;
 }
@@ -2588,14 +2588,14 @@ bool __cpuset_node_allowed(int node, gfp_t gfp_mask)
                return true;
 
        /* Not hardwall and node outside mems_allowed: scan up cpusets */
-       spin_lock_irqsave(&callback_lock, flags);
+       raw_spin_lock_irqsave(&callback_lock, flags);
 
        rcu_read_lock();
        cs = nearest_hardwall_ancestor(task_cs(current));
        allowed = node_isset(node, cs->mems_allowed);
        rcu_read_unlock();
 
-       spin_unlock_irqrestore(&callback_lock, flags);
+       raw_spin_unlock_irqrestore(&callback_lock, flags);
        return allowed;
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a5b0c6c25b44..9c5285cc082c 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4218,14 +4218,6 @@ static int __sched_setscheduler(struct task_struct *p,
                if (attr->sched_flags & SCHED_FLAG_SUGOV)
                        return -EINVAL;
 
-               /*
-                * Make sure we don't race with the cpuset subsystem where root
-                * domains can be rebuilt or modified while operations like DL
-                * admission checks are carried out.
-                */
-               if (!cpuset_lock())
-                       return -EBUSY;
-
                retval = security_task_setscheduler(p);
                if (retval)
                        return retval;
@@ -4241,6 +4233,13 @@ static int __sched_setscheduler(struct task_struct *p,
        rq = task_rq_lock(p, &rf);
        update_rq_clock(rq);
 
+       /*
+        * Make sure we don't race with the cpuset subsystem where root
+        * domains can be rebuilt or modified while operations like DL
+        * admission checks are carried out.
+        */
+       cpuset_lock();
+
        /*
         * Changing the policy of the stop threads its a very bad idea:
         */
@@ -4302,9 +4301,8 @@ static int __sched_setscheduler(struct task_struct *p,
        /* Re-check policy now with rq lock held: */
        if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
                policy = oldpolicy = -1;
+               cpuset_unlock();
                task_rq_unlock(rq, p, &rf);
-               if (user)
-                       cpuset_unlock();
                goto recheck;
        }
 
@@ -4361,9 +4359,8 @@ static int __sched_setscheduler(struct task_struct *p,
 
        /* Avoid rq from going away on us: */
        preempt_disable();
+       cpuset_unlock();
        task_rq_unlock(rq, p, &rf);
-       if (user)
-               cpuset_unlock();
 
        if (pi)
                rt_mutex_adjust_pi(p);
@@ -4375,9 +4372,8 @@ static int __sched_setscheduler(struct task_struct *p,
        return 0;
 
 unlock:
+       cpuset_unlock();
        task_rq_unlock(rq, p, &rf);
-       if (user)
-               cpuset_unlock();
        return retval;
 }

Reply via email to