Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-15 Thread Juri Lelli
On 15/02/18 11:33, Juri Lelli wrote:
> On 14/02/18 17:31, Juri Lelli wrote:
> 
> [...]
> 
> > Still grabbing it is a no-go, as do_sched_setscheduler calls
> > sched_setscheduler from inside an RCU read-side critical section.
> 
> I was then actually thinking that trylocking might do.. not sure however
> if failing with -EBUSY in the contended case is feasible (and about the
> general uglyness of the solution :/).

Or, as suggested by Peter in IRC, the following (which still would
require conditional locking for the sysrq case).

--->8---
 kernel/sched/core.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0d8badcf1f0f..4e9405d50cbd 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4312,6 +4312,7 @@ static int __sched_setscheduler(struct task_struct *p,
/* Avoid rq from going away on us: */
preempt_disable();
task_rq_unlock(rq, p, &rf);
+   cpuset_unlock();
 
if (pi)
rt_mutex_adjust_pi(p);
@@ -4409,10 +4410,16 @@ do_sched_setscheduler(pid_t pid, int policy, struct 
sched_param __user *param)
rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
-   if (p != NULL)
-   retval = sched_setscheduler(p, policy, &lparam);
+   if (!p) {
+   rcu_read_unlock();
+   goto exit;
+   }
+   get_task_struct(p);
rcu_read_unlock();
+   retval = sched_setscheduler(p, policy, &lparam);
+   put_task_struct(p);
 
+exit:
return retval;
 }
 
@@ -4540,10 +4547,16 @@ SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct 
sched_attr __user *, uattr,
rcu_read_lock();
retval = -ESRCH;
p = find_process_by_pid(pid);
-   if (p != NULL)
-   retval = sched_setattr(p, &attr);
+   if (!p) {
+   rcu_read_unlock();
+   goto exit;
+   }
+   get_task_struct(p);
rcu_read_unlock();
+   retval = sched_setattr(p, &attr);
+   put_task_struct(p);
 
+exit:
return retval;
 }
 


Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-15 Thread Juri Lelli
On 14/02/18 17:31, Juri Lelli wrote:

[...]

> Still grabbing it is a no-go, as do_sched_setscheduler calls
> sched_setscheduler from inside an RCU read-side critical section.

I was then actually thinking that trylocking might do.. not sure however
if failing with -EBUSY in the contended case is feasible (and about the
general uglyness of the solution :/).

--->8---
 include/linux/cpuset.h | 4 ++--
 kernel/cgroup/cpuset.c | 6 +++---
 kernel/sched/core.c| 4 +++-
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 4bbb3f5a3020..53c3f4e13cb0 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -55,7 +55,7 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
-extern void cpuset_lock(void);
+extern int cpuset_trylock(void);
 extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
@@ -178,7 +178,7 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
-static inline void cpuset_lock(void) { }
+static inline int cpuset_trylock(void) { return 1; }
 
 static inline void cpuset_unlock(void) { }
 
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 41f8391640e6..995aac5b032d 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2410,11 +2410,11 @@ void __init cpuset_init_smp(void)
 }
 
 /**
- * cpuset_lock - Grab the cpuset_mutex from another subsysytem
+ * cpuset_trylock - Try to grab the cpuset_mutex atomically from another 
subsytem
  */
-void cpuset_lock(void)
+int cpuset_trylock(void)
 {
-   mutex_lock(&cpuset_mutex);
+   return mutex_trylock(&cpuset_mutex);
 }
 
 /**
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0d8badcf1f0f..b491b406a835 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4180,7 +4180,8 @@ static int __sched_setscheduler(struct task_struct *p,
 * domains can be rebuilt or modified while operations like DL
 * admission checks are carried out.
 */
-   cpuset_lock();
+   if (!cpuset_trylock())
+   return -EBUSY;
 
/*
 * Make sure no PI-waiters arrive (or leave) while we are
@@ -4312,6 +4313,7 @@ static int __sched_setscheduler(struct task_struct *p,
/* Avoid rq from going away on us: */
preempt_disable();
task_rq_unlock(rq, p, &rf);
+   cpuset_unlock();
 
if (pi)
rt_mutex_adjust_pi(p);


Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-14 Thread Juri Lelli
On 14/02/18 08:33, Mathieu Poirier wrote:
> On 14 February 2018 at 04:27, Juri Lelli  wrote:
> > On 14/02/18 11:49, Juri Lelli wrote:
> >> On 14/02/18 11:36, Juri Lelli wrote:
> >> > Hi Mathieu,
> >> >
> >> > On 13/02/18 13:32, Mathieu Poirier wrote:
> >> > > No synchronisation mechanism exist between the cpuset subsystem and 
> >> > > calls
> >> > > to function __sched_setscheduler().  As such it is possible that new 
> >> > > root
> >> > > domains are created on the cpuset side while a deadline acceptance test
> >> > > is carried out in __sched_setscheduler(), leading to a potential 
> >> > > oversell
> >> > > of CPU bandwidth.
> >> > >
> >> > > By making available the cpuset_mutex to the core scheduler it is 
> >> > > possible
> >> > > to prevent situations such as the one described above from happening.
> >> > >
> >> > > Signed-off-by: Mathieu Poirier 
> >> > > ---
> >> >
> >> > [...]
> >> >
> >> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> >> > > index f727c3d0064c..0d8badcf1f0f 100644
> >> > > --- a/kernel/sched/core.c
> >> > > +++ b/kernel/sched/core.c
> >> > > @@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct 
> >> > > task_struct *p,
> >> > >   }
> >> > >
> >> > >   /*
> >> > > +  * Make sure we don't race with the cpuset subsystem where root
> >> > > +  * domains can be rebuilt or modified while operations like DL
> >> > > +  * admission checks are carried out.
> >> > > +  */
> >> > > + cpuset_lock();
> >> > > +
> >> > > + /*
> >> >
> >> > Mmm, I'm afraid we can't do this. __sched_setscheduler might be called
> >> > from interrupt contex by normalize_rt_tasks().
> >>
> >> Maybe conditionally grabbing it if pi is true could do? I guess we don't
> >> care much about domains when sysrq.
> >
> > Ops.. just got this. :/
> 
> 
> Arrghhh... Back to the drawing board.
> 

Eh.. even though the warning simply happens because unlocking of
cpuset lock is missing

--->8---
@@ -4312,6 +4312,7 @@ static int __sched_setscheduler(struct task_struct *p,

/* Avoid rq from going away on us: */   
preempt_disable();
task_rq_unlock(rq, p, &rf); 
+   cpuset_unlock();  
  
if (pi)   
rt_mutex_adjust_pi(p);
--->8---

Still grabbing it is a no-go, as do_sched_setscheduler calls
sched_setscheduler from inside an RCU read-side critical section.

So, back to the drawing board indeed. :/

Thanks,

- Juri


Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-14 Thread Mathieu Poirier
On 14 February 2018 at 04:27, Juri Lelli  wrote:
> On 14/02/18 11:49, Juri Lelli wrote:
>> On 14/02/18 11:36, Juri Lelli wrote:
>> > Hi Mathieu,
>> >
>> > On 13/02/18 13:32, Mathieu Poirier wrote:
>> > > No synchronisation mechanism exist between the cpuset subsystem and calls
>> > > to function __sched_setscheduler().  As such it is possible that new root
>> > > domains are created on the cpuset side while a deadline acceptance test
>> > > is carried out in __sched_setscheduler(), leading to a potential oversell
>> > > of CPU bandwidth.
>> > >
>> > > By making available the cpuset_mutex to the core scheduler it is possible
>> > > to prevent situations such as the one described above from happening.
>> > >
>> > > Signed-off-by: Mathieu Poirier 
>> > > ---
>> >
>> > [...]
>> >
>> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> > > index f727c3d0064c..0d8badcf1f0f 100644
>> > > --- a/kernel/sched/core.c
>> > > +++ b/kernel/sched/core.c
>> > > @@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct 
>> > > task_struct *p,
>> > >   }
>> > >
>> > >   /*
>> > > +  * Make sure we don't race with the cpuset subsystem where root
>> > > +  * domains can be rebuilt or modified while operations like DL
>> > > +  * admission checks are carried out.
>> > > +  */
>> > > + cpuset_lock();
>> > > +
>> > > + /*
>> >
>> > Mmm, I'm afraid we can't do this. __sched_setscheduler might be called
>> > from interrupt contex by normalize_rt_tasks().
>>
>> Maybe conditionally grabbing it if pi is true could do? I guess we don't
>> care much about domains when sysrq.
>
> Ops.. just got this. :/


Arrghhh... Back to the drawing board.

>
> --->8---
> [0.020203] ==
> [0.020946] WARNING: possible circular locking dependency detected
> [0.021000] 4.16.0-rc1+ #64 Not tainted
> [0.021000] --
> [0.021000] swapper/0/1 is trying to acquire lock:
> [0.021000]  (cpu_hotplug_lock.rw_sem){.+.+}, at: [<7164d41d>] 
> smpboot_register_percpu_thread_cpumask+0x2d/0x100
> [0.021000]
> [0.021000] but task is already holding lock:
> [0.021000]  (cpuset_mutex){+.+.}, at: [<8529a52c>] 
> __sched_setscheduler+0xb5/0x8b0
> [0.021000]
> [0.021000] which lock already depends on the new lock.
> [0.021000]
> [0.021000]
> [0.021000] the existing dependency chain (in reverse order) is:
> [0.021000]
> [0.021000] -> #2 (cpuset_mutex){+.+.}:
> [0.021000]__sched_setscheduler+0xb5/0x8b0
> [0.021000]_sched_setscheduler+0x6c/0x80
> [0.021000]__kthread_create_on_node+0x10e/0x170
> [0.021000]kthread_create_on_node+0x37/0x40
> [0.021000]kthread_create_on_cpu+0x27/0x90
> [0.021000]__smpboot_create_thread.part.3+0x64/0xe0
> [0.021000]smpboot_register_percpu_thread_cpumask+0x91/0x100
> [0.021000]spawn_ksoftirqd+0x37/0x40
> [0.021000]do_one_initcall+0x3b/0x160
> [0.021000]kernel_init_freeable+0x118/0x258
> [0.021000]kernel_init+0xa/0x100
> [0.021000]ret_from_fork+0x3a/0x50
> [0.021000]
> [0.021000] -> #1 (smpboot_threads_lock){+.+.}:
> [0.021000]smpboot_register_percpu_thread_cpumask+0x3b/0x100
> [0.021000]spawn_ksoftirqd+0x37/0x40
> [0.021000]do_one_initcall+0x3b/0x160
> [0.021000]kernel_init_freeable+0x118/0x258
> [0.021000]kernel_init+0xa/0x100
> [0.021000]ret_from_fork+0x3a/0x50
> [0.021000]
> [0.021000] -> #0 (cpu_hotplug_lock.rw_sem){.+.+}:
> [0.021000]cpus_read_lock+0x3e/0x80
> [0.021000]smpboot_register_percpu_thread_cpumask+0x2d/0x100
> [0.021000]lockup_detector_init+0x3e/0x74
> [0.021000]kernel_init_freeable+0x146/0x258
> [0.021000]kernel_init+0xa/0x100
> [0.021000]ret_from_fork+0x3a/0x50
> [0.021000]
> [0.021000] other info that might help us debug this:
> [0.021000]
> [0.021000] Chain exists of:
> [0.021000]   cpu_hotplug_lock.rw_sem --> smpboot_threads_lock --> 
> cpuset_mutex
> [0.021000]
> [0.021000]  Possible unsafe locking scenario:
> [0.021000]
> [0.021000]CPU0CPU1
> [0.021000]
> [0.021000]   lock(cpuset_mutex);
> [0.021000]lock(smpboot_threads_lock);
> [0.021000]lock(cpuset_mutex);
> [0.021000]   lock(cpu_hotplug_lock.rw_sem);
> [0.021000]
> [0.021000]  *** DEADLOCK ***
> [0.021000]
> [0.021000] 1 lock held by swapper/0/1:
> [0.021000]  #0:  (cpuset_mutex){+.+.}, at: [<8529a52c>] 
> __sched_setscheduler+0xb5/0x8b0
> [0.021000]
> [0.021000] stack backtrace:
> [0.021000] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1+ #64
> [0.021000]

Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-14 Thread Juri Lelli
On 14/02/18 11:49, Juri Lelli wrote:
> On 14/02/18 11:36, Juri Lelli wrote:
> > Hi Mathieu,
> > 
> > On 13/02/18 13:32, Mathieu Poirier wrote:
> > > No synchronisation mechanism exist between the cpuset subsystem and calls
> > > to function __sched_setscheduler().  As such it is possible that new root
> > > domains are created on the cpuset side while a deadline acceptance test
> > > is carried out in __sched_setscheduler(), leading to a potential oversell
> > > of CPU bandwidth.
> > > 
> > > By making available the cpuset_mutex to the core scheduler it is possible
> > > to prevent situations such as the one described above from happening.
> > > 
> > > Signed-off-by: Mathieu Poirier 
> > > ---
> > 
> > [...]
> > 
> > > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > > index f727c3d0064c..0d8badcf1f0f 100644
> > > --- a/kernel/sched/core.c
> > > +++ b/kernel/sched/core.c
> > > @@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct task_struct 
> > > *p,
> > >   }
> > >  
> > >   /*
> > > +  * Make sure we don't race with the cpuset subsystem where root
> > > +  * domains can be rebuilt or modified while operations like DL
> > > +  * admission checks are carried out.
> > > +  */
> > > + cpuset_lock();
> > > +
> > > + /*
> > 
> > Mmm, I'm afraid we can't do this. __sched_setscheduler might be called
> > from interrupt contex by normalize_rt_tasks().
> 
> Maybe conditionally grabbing it if pi is true could do? I guess we don't
> care much about domains when sysrq.

Ops.. just got this. :/

--->8---
[0.020203] ==
[0.020946] WARNING: possible circular locking dependency detected
[0.021000] 4.16.0-rc1+ #64 Not tainted
[0.021000] --
[0.021000] swapper/0/1 is trying to acquire lock:
[0.021000]  (cpu_hotplug_lock.rw_sem){.+.+}, at: [<7164d41d>] 
smpboot_register_percpu_thread_cpumask+0x2d/0x100
[0.021000]
[0.021000] but task is already holding lock:
[0.021000]  (cpuset_mutex){+.+.}, at: [<8529a52c>] 
__sched_setscheduler+0xb5/0x8b0
[0.021000]
[0.021000] which lock already depends on the new lock.
[0.021000]
[0.021000]
[0.021000] the existing dependency chain (in reverse order) is:
[0.021000]
[0.021000] -> #2 (cpuset_mutex){+.+.}:
[0.021000]__sched_setscheduler+0xb5/0x8b0
[0.021000]_sched_setscheduler+0x6c/0x80
[0.021000]__kthread_create_on_node+0x10e/0x170
[0.021000]kthread_create_on_node+0x37/0x40
[0.021000]kthread_create_on_cpu+0x27/0x90
[0.021000]__smpboot_create_thread.part.3+0x64/0xe0
[0.021000]smpboot_register_percpu_thread_cpumask+0x91/0x100
[0.021000]spawn_ksoftirqd+0x37/0x40
[0.021000]do_one_initcall+0x3b/0x160
[0.021000]kernel_init_freeable+0x118/0x258
[0.021000]kernel_init+0xa/0x100
[0.021000]ret_from_fork+0x3a/0x50
[0.021000]
[0.021000] -> #1 (smpboot_threads_lock){+.+.}:
[0.021000]smpboot_register_percpu_thread_cpumask+0x3b/0x100
[0.021000]spawn_ksoftirqd+0x37/0x40
[0.021000]do_one_initcall+0x3b/0x160
[0.021000]kernel_init_freeable+0x118/0x258
[0.021000]kernel_init+0xa/0x100
[0.021000]ret_from_fork+0x3a/0x50
[0.021000]
[0.021000] -> #0 (cpu_hotplug_lock.rw_sem){.+.+}:
[0.021000]cpus_read_lock+0x3e/0x80
[0.021000]smpboot_register_percpu_thread_cpumask+0x2d/0x100
[0.021000]lockup_detector_init+0x3e/0x74
[0.021000]kernel_init_freeable+0x146/0x258
[0.021000]kernel_init+0xa/0x100
[0.021000]ret_from_fork+0x3a/0x50
[0.021000]
[0.021000] other info that might help us debug this:
[0.021000]
[0.021000] Chain exists of:
[0.021000]   cpu_hotplug_lock.rw_sem --> smpboot_threads_lock --> 
cpuset_mutex
[0.021000]
[0.021000]  Possible unsafe locking scenario:
[0.021000]
[0.021000]CPU0CPU1
[0.021000]
[0.021000]   lock(cpuset_mutex);
[0.021000]lock(smpboot_threads_lock);
[0.021000]lock(cpuset_mutex);
[0.021000]   lock(cpu_hotplug_lock.rw_sem);
[0.021000]
[0.021000]  *** DEADLOCK ***
[0.021000]
[0.021000] 1 lock held by swapper/0/1:
[0.021000]  #0:  (cpuset_mutex){+.+.}, at: [<8529a52c>] 
__sched_setscheduler+0xb5/0x8b0
[0.021000]
[0.021000] stack backtrace:
[0.021000] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.16.0-rc1+ #64
[0.021000] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 
1.10.2-2.fc27 04/01/2014
[0.021000] Call Trace:
[0.021000]  dump_stack+0x85/0xc5
[0.021000]  print_circular_bug.isra.38+0x1ce/0x1db
[0.021000]  __lock_acquire+0x1278/0x1320
[0.021000]  ? sc

Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-14 Thread Juri Lelli
On 14/02/18 11:36, Juri Lelli wrote:
> Hi Mathieu,
> 
> On 13/02/18 13:32, Mathieu Poirier wrote:
> > No synchronisation mechanism exist between the cpuset subsystem and calls
> > to function __sched_setscheduler().  As such it is possible that new root
> > domains are created on the cpuset side while a deadline acceptance test
> > is carried out in __sched_setscheduler(), leading to a potential oversell
> > of CPU bandwidth.
> > 
> > By making available the cpuset_mutex to the core scheduler it is possible
> > to prevent situations such as the one described above from happening.
> > 
> > Signed-off-by: Mathieu Poirier 
> > ---
> 
> [...]
> 
> > diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> > index f727c3d0064c..0d8badcf1f0f 100644
> > --- a/kernel/sched/core.c
> > +++ b/kernel/sched/core.c
> > @@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct task_struct 
> > *p,
> > }
> >  
> > /*
> > +* Make sure we don't race with the cpuset subsystem where root
> > +* domains can be rebuilt or modified while operations like DL
> > +* admission checks are carried out.
> > +*/
> > +   cpuset_lock();
> > +
> > +   /*
> 
> Mmm, I'm afraid we can't do this. __sched_setscheduler might be called
> from interrupt contex by normalize_rt_tasks().

Maybe conditionally grabbing it if pi is true could do? I guess we don't
care much about domains when sysrq.


Re: [PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-14 Thread Juri Lelli
Hi Mathieu,

On 13/02/18 13:32, Mathieu Poirier wrote:
> No synchronisation mechanism exist between the cpuset subsystem and calls
> to function __sched_setscheduler().  As such it is possible that new root
> domains are created on the cpuset side while a deadline acceptance test
> is carried out in __sched_setscheduler(), leading to a potential oversell
> of CPU bandwidth.
> 
> By making available the cpuset_mutex to the core scheduler it is possible
> to prevent situations such as the one described above from happening.
> 
> Signed-off-by: Mathieu Poirier 
> ---

[...]

> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index f727c3d0064c..0d8badcf1f0f 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct task_struct *p,
>   }
>  
>   /*
> +  * Make sure we don't race with the cpuset subsystem where root
> +  * domains can be rebuilt or modified while operations like DL
> +  * admission checks are carried out.
> +  */
> + cpuset_lock();
> +
> + /*

Mmm, I'm afraid we can't do this. __sched_setscheduler might be called
from interrupt contex by normalize_rt_tasks().

Best,

- Juri


[PATCH V3 04/10] sched/core: Prevent race condition between cpuset and __sched_setscheduler()

2018-02-13 Thread Mathieu Poirier
No synchronisation mechanism exist between the cpuset subsystem and calls
to function __sched_setscheduler().  As such it is possible that new root
domains are created on the cpuset side while a deadline acceptance test
is carried out in __sched_setscheduler(), leading to a potential oversell
of CPU bandwidth.

By making available the cpuset_mutex to the core scheduler it is possible
to prevent situations such as the one described above from happening.

Signed-off-by: Mathieu Poirier 
---
 include/linux/cpuset.h |  6 ++
 kernel/cgroup/cpuset.c | 16 
 kernel/sched/core.c|  9 +
 3 files changed, 31 insertions(+)

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 934633a05d20..4bbb3f5a3020 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -55,6 +55,8 @@ extern void cpuset_init_smp(void);
 extern void cpuset_force_rebuild(void);
 extern void cpuset_update_active_cpus(void);
 extern void cpuset_wait_for_hotplug(void);
+extern void cpuset_lock(void);
+extern void cpuset_unlock(void);
 extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
 extern void cpuset_cpus_allowed_fallback(struct task_struct *p);
 extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
@@ -176,6 +178,10 @@ static inline void cpuset_update_active_cpus(void)
 
 static inline void cpuset_wait_for_hotplug(void) { }
 
+static inline void cpuset_lock(void) { }
+
+static inline void cpuset_unlock(void) { }
+
 static inline void cpuset_cpus_allowed(struct task_struct *p,
   struct cpumask *mask)
 {
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index d18c72e83de4..41f8391640e6 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -2410,6 +2410,22 @@ void __init cpuset_init_smp(void)
 }
 
 /**
+ * cpuset_lock - Grab the cpuset_mutex from another subsysytem
+ */
+void cpuset_lock(void)
+{
+   mutex_lock(&cpuset_mutex);
+}
+
+/**
+ * cpuset_unlock - Release the cpuset_mutex from another subsysytem
+ */
+void cpuset_unlock(void)
+{
+   mutex_unlock(&cpuset_mutex);
+}
+
+/**
  * cpuset_cpus_allowed - return cpus_allowed mask from a tasks cpuset.
  * @tsk: pointer to task_struct from which to obtain cpuset->cpus_allowed.
  * @pmask: pointer to struct cpumask variable to receive cpus_allowed set.
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index f727c3d0064c..0d8badcf1f0f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4176,6 +4176,13 @@ static int __sched_setscheduler(struct task_struct *p,
}
 
/*
+* Make sure we don't race with the cpuset subsystem where root
+* domains can be rebuilt or modified while operations like DL
+* admission checks are carried out.
+*/
+   cpuset_lock();
+
+   /*
 * Make sure no PI-waiters arrive (or leave) while we are
 * changing the priority of the task:
 *
@@ -4247,6 +4254,7 @@ static int __sched_setscheduler(struct task_struct *p,
if (unlikely(oldpolicy != -1 && oldpolicy != p->policy)) {
policy = oldpolicy = -1;
task_rq_unlock(rq, p, &rf);
+   cpuset_unlock();
goto recheck;
}
 
@@ -4316,6 +4324,7 @@ static int __sched_setscheduler(struct task_struct *p,
 
 unlock:
task_rq_unlock(rq, p, &rf);
+   cpuset_unlock();
return retval;
 }
 
-- 
2.7.4