Commit-ID:  d74b27d63a8bebe2fe634944e4ebdc7b10db7a39
Gitweb:     https://git.kernel.org/tip/d74b27d63a8bebe2fe634944e4ebdc7b10db7a39
Author:     Juri Lelli <juri.le...@redhat.com>
AuthorDate: Fri, 19 Jul 2019 15:59:58 +0200
Committer:  Ingo Molnar <mi...@kernel.org>
CommitDate: Thu, 25 Jul 2019 15:55:03 +0200

cgroup/cpuset: Change cpuset_rwsem and hotplug lock order

cpuset_rwsem is going to be acquired from sched_setscheduler() with a
following patch. There are however paths (e.g., spawn_ksoftirqd) in
which sched_scheduler() is eventually called while holding hotplug lock;
this creates a dependecy between hotplug lock (to be always acquired
first) and cpuset_rwsem (to be always acquired after hotplug lock).

Fix paths which currently take the two locks in the wrong order (after
a following patch is applied).

Tested-by: Dietmar Eggemann <dietmar.eggem...@arm.com>
Signed-off-by: Juri Lelli <juri.le...@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Thomas Gleixner <t...@linutronix.de>
Cc: bris...@redhat.com
Cc: clau...@evidence.eu.com
Cc: lize...@huawei.com
Cc: long...@redhat.com
Cc: luca.ab...@santannapisa.it
Cc: mathieu.poir...@linaro.org
Cc: rost...@goodmis.org
Cc: t...@kernel.org
Cc: tommaso.cucino...@santannapisa.it
Link: https://lkml.kernel.org/r/20190719140000.31694-7-juri.le...@redhat.com
Signed-off-by: Ingo Molnar <mi...@kernel.org>
---
 include/linux/cpuset.h |  8 ++++----
 kernel/cgroup/cpuset.c | 22 +++++++++++++++++-----
 2 files changed, 21 insertions(+), 9 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 934633a05d20..7f1478c26a33 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -40,14 +40,14 @@ static inline bool cpusets_enabled(void)
 
 static inline void cpuset_inc(void)
 {
-       static_branch_inc(&cpusets_pre_enable_key);
-       static_branch_inc(&cpusets_enabled_key);
+       static_branch_inc_cpuslocked(&cpusets_pre_enable_key);
+       static_branch_inc_cpuslocked(&cpusets_enabled_key);
 }
 
 static inline void cpuset_dec(void)
 {
-       static_branch_dec(&cpusets_enabled_key);
-       static_branch_dec(&cpusets_pre_enable_key);
+       static_branch_dec_cpuslocked(&cpusets_enabled_key);
+       static_branch_dec_cpuslocked(&cpusets_pre_enable_key);
 }
 
 extern int cpuset_init(void);
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index e1a8d168c5e9..5c5014caa23c 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -973,8 +973,8 @@ static void rebuild_sched_domains_locked(void)
        cpumask_var_t *doms;
        int ndoms;
 
+       lockdep_assert_cpus_held();
        percpu_rwsem_assert_held(&cpuset_rwsem);
-       get_online_cpus();
 
        /*
         * We have raced with CPU hotplug. Don't do anything to avoid
@@ -983,19 +983,17 @@ static void rebuild_sched_domains_locked(void)
         */
        if (!top_cpuset.nr_subparts_cpus &&
            !cpumask_equal(top_cpuset.effective_cpus, cpu_active_mask))
-               goto out;
+               return;
 
        if (top_cpuset.nr_subparts_cpus &&
           !cpumask_subset(top_cpuset.effective_cpus, cpu_active_mask))
-               goto out;
+               return;
 
        /* Generate domain masks and attrs */
        ndoms = generate_sched_domains(&doms, &attr);
 
        /* Have scheduler rebuild the domains */
        partition_and_rebuild_sched_domains(ndoms, doms, attr);
-out:
-       put_online_cpus();
 }
 #else /* !CONFIG_SMP */
 static void rebuild_sched_domains_locked(void)
@@ -1005,9 +1003,11 @@ static void rebuild_sched_domains_locked(void)
 
 void rebuild_sched_domains(void)
 {
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
        rebuild_sched_domains_locked();
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
 }
 
 /**
@@ -2245,6 +2245,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state 
*css, struct cftype *cft,
        cpuset_filetype_t type = cft->private;
        int retval = 0;
 
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
        if (!is_cpuset_online(cs)) {
                retval = -ENODEV;
@@ -2282,6 +2283,7 @@ static int cpuset_write_u64(struct cgroup_subsys_state 
*css, struct cftype *cft,
        }
 out_unlock:
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
        return retval;
 }
 
@@ -2292,6 +2294,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state 
*css, struct cftype *cft,
        cpuset_filetype_t type = cft->private;
        int retval = -ENODEV;
 
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
        if (!is_cpuset_online(cs))
                goto out_unlock;
@@ -2306,6 +2309,7 @@ static int cpuset_write_s64(struct cgroup_subsys_state 
*css, struct cftype *cft,
        }
 out_unlock:
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
        return retval;
 }
 
@@ -2344,6 +2348,7 @@ static ssize_t cpuset_write_resmask(struct 
kernfs_open_file *of,
        kernfs_break_active_protection(of->kn);
        flush_work(&cpuset_hotplug_work);
 
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
        if (!is_cpuset_online(cs))
                goto out_unlock;
@@ -2369,6 +2374,7 @@ static ssize_t cpuset_write_resmask(struct 
kernfs_open_file *of,
        free_cpuset(trialcs);
 out_unlock:
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
        kernfs_unbreak_active_protection(of->kn);
        css_put(&cs->css);
        flush_workqueue(cpuset_migrate_mm_wq);
@@ -2499,6 +2505,7 @@ static ssize_t sched_partition_write(struct 
kernfs_open_file *of, char *buf,
                return -EINVAL;
 
        css_get(&cs->css);
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
        if (!is_cpuset_online(cs))
                goto out_unlock;
@@ -2506,6 +2513,7 @@ static ssize_t sched_partition_write(struct 
kernfs_open_file *of, char *buf,
        retval = update_prstate(cs, val);
 out_unlock:
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
        css_put(&cs->css);
        return retval ?: nbytes;
 }
@@ -2711,6 +2719,7 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
        if (!parent)
                return 0;
 
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
 
        set_bit(CS_ONLINE, &cs->flags);
@@ -2763,6 +2772,7 @@ static int cpuset_css_online(struct cgroup_subsys_state 
*css)
        spin_unlock_irq(&callback_lock);
 out_unlock:
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
        return 0;
 }
 
@@ -2781,6 +2791,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state 
*css)
 {
        struct cpuset *cs = css_cs(css);
 
+       get_online_cpus();
        percpu_down_write(&cpuset_rwsem);
 
        if (is_partition_root(cs))
@@ -2801,6 +2812,7 @@ static void cpuset_css_offline(struct cgroup_subsys_state 
*css)
        clear_bit(CS_ONLINE, &cs->flags);
 
        percpu_up_write(&cpuset_rwsem);
+       put_online_cpus();
 }
 
 static void cpuset_css_free(struct cgroup_subsys_state *css)

Reply via email to