/*
* Wait if task attach is in progress until it is done and then acquire
@@ -3044,8 +3046,23 @@ static int cpuset_can_attach_check(struct cpuset *cs,
struct cpuset *oldcs,
if (!oldcs)
return 0;
- if (!llist_on_list(&oldcs->attach_node))
+ /*
+ * The same cpuset cannot be both a source and a destination.
+ * The current code does not support that, print a warning and
+ * fail the attach if so.
+ */
+ if (WARN_ON_ONCE((!oldcs->attach_source &&
+ llist_on_list(&oldcs->attach_node)) ||
+ cs->attach_source))
+ return -EINVAL;
+
+ if (!llist_on_list(&oldcs->attach_node)) {
llist_add(&oldcs->attach_node, &src_cs_head);
+ oldcs->attach_source = true;
+ }
+
+ if (!llist_on_list(&cs->attach_node))
+ llist_add(&cs->attach_node, &dst_cs_head);
cpus_updated = !cpumask_equal(cs->effective_cpus, oldcs->effective_cpus);
mems_updated = !nodes_equal(cs->effective_mems, oldcs->effective_mems);
@@ -3075,35 +3092,31 @@ static int cpuset_can_attach_check(struct cpuset *cs,
struct cpuset *oldcs,
return 0;
}
-static int cpuset_reserve_dl_bw(struct cpuset *cs)
+static int cpuset_reserve_dl_bw(void)
{
+ struct cpuset *cs;
int cpu, ret;
- if (!cs->sum_migrate_dl_bw)
- return 0;
+ llist_for_each_entry(cs, dst_cs_head.first, attach_node) {
+ if (!cs->sum_migrate_dl_bw)
+ continue;
- cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
- if (unlikely(cpu >= nr_cpu_ids))
- return -EINVAL;
+ cpu = cpumask_any_and(cpu_active_mask, cs->effective_cpus);
+ if (unlikely(cpu >= nr_cpu_ids))
+ return -EINVAL;
- ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
- if (ret)
- return ret;
+ ret = dl_bw_alloc(cpu, cs->sum_migrate_dl_bw);
+ if (ret)
+ return ret;
- cs->dl_bw_cpu = cpu;
+ cs->dl_bw_cpu = cpu;
+ }
return 0;
}
-static void reset_migrate_dl_data(struct cpuset *cs)
-{
- cs->nr_migrate_dl_tasks = 0;
- cs->sum_migrate_dl_bw = 0;
- cs->dl_bw_cpu = -1;
-}
-
/*
* Clear and optionally apply (@cancel is false) the attach related data in
the
- * source cpusets.
+ * source or destination cpuset.
*/
static void clear_attach_data(struct llist_head *head, bool cancel)
{
@@ -3115,8 +3128,13 @@ static void clear_attach_data(struct llist_head *head,
bool cancel)
if (cs->nr_migrate_dl_tasks) {
if (!cancel)
atomic_add(cs->nr_migrate_dl_tasks,
&cs->nr_deadline_tasks);
+ else if (cs->dl_bw_cpu >= 0) /* && cacnel */
+ dl_bw_free(cs->dl_bw_cpu,
cs->sum_migrate_dl_bw);
cs->nr_migrate_dl_tasks = 0;
+ cs->sum_migrate_dl_bw = 0;
+ cs->dl_bw_cpu = -1;
}
+ cs->attach_source = false;
}
}
@@ -3137,6 +3155,7 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
mutex_lock(&cpuset_mutex);
attach_ctx.cpus_updated = false;
attach_ctx.mems_updated = false;
+ attach_ctx.many_dest_cs = false;
/* Check to see if task is allowed in the cpuset */
ret = cpuset_can_attach_check(cs, oldcs, &setsched_check);
@@ -3161,9 +3180,13 @@ static int cpuset_can_attach(struct cgroup_taskset *tset)
* selected as attach_ctx.old_cs.
*/
cgroup_taskset_for_each(task, css, tset) {
+ struct cpuset *new_cs = css_cs(css);
struct cpuset *new_oldcs = task_cs(task);
- if (new_oldcs != oldcs) {
+ if ((new_oldcs != oldcs) || (new_cs != cs)) {
+ if (new_cs != cs)
+ attach_ctx.many_dest_cs = true;
+ cs = new_cs;
oldcs = new_oldcs;
ret = cpuset_can_attach_check(cs, oldcs,
&setsched_check);
if (ret)
@@ -3197,12 +3220,28 @@ static int cpuset_can_attach(struct cgroup_taskset
*tset)
}
}
- ret = cpuset_reserve_dl_bw(cs);
+ /*
+ * The only case where there are multiple destination cpusets for
+ * task migration is when enabling a v2 cpuset controllers where
+ * tasks will be migrated to multiple child cpusets from a parent
+ * cpuset with the same effective CPUs and memory nodes. IOW,
+ * both attach_cpus_updated and attach_mems_updated should be false.
+ * If not, it is a condition that the current code cannot handled.
+ * Print a warning and abort the attach operation as further code
+ * change will be needed.
+ */
+ if (WARN_ON_ONCE(attach_ctx.many_dest_cs && (!cpuset_v2() ||
+ attach_ctx.cpus_updated || attach_ctx.mems_updated))) {
+ ret = -EINVAL;
+ goto out_unlock;
+ }
+
+ ret = cpuset_reserve_dl_bw();
out_unlock:
if (ret) {
- reset_migrate_dl_data(cs);
clear_attach_data(&src_cs_head, true);
+ clear_attach_data(&dst_cs_head, true);
} else {
attach_ctx.in_progress++;
}
@@ -3213,22 +3252,10 @@ static int cpuset_can_attach(struct cgroup_taskset
*tset)
static void cpuset_cancel_attach(struct cgroup_taskset *tset)
{
- struct cgroup_subsys_state *css;
- struct cpuset *cs;
-
- cgroup_taskset_first(tset, &css);
- cs = css_cs(css);
-
mutex_lock(&cpuset_mutex);
dec_attach_in_progress_locked();
clear_attach_data(&src_cs_head, true);
-
- if (cs->dl_bw_cpu >= 0)
- dl_bw_free(cs->dl_bw_cpu, cs->sum_migrate_dl_bw);
-
- if (cs->nr_migrate_dl_tasks)
- reset_migrate_dl_data(cs);
-
+ clear_attach_data(&dst_cs_head, true);
mutex_unlock(&cpuset_mutex);
}
@@ -3312,25 +3339,25 @@ static void cpuset_attach(struct cgroup_taskset *tset)
* In the default hierarchy, enabling cpuset in the child cgroups
* will trigger a cpuset_attach() call with no change in effective cpus
* and mems. In that case, we can optimize out by skipping the task
- * iteration and update.
+ * iteration and updatebut the destination cpuset list is iterated to
+ * set old_mems_allowed.
*/
- if (cpuset_v2() && !attach_ctx.cpus_updated && !attach_ctx.mems_updated)
+ if (cpuset_v2() && !attach_ctx.cpus_updated &&
!attach_ctx.mems_updated) {
+ llist_for_each_entry(cs, dst_cs_head.first, attach_node)
+ cs->old_mems_allowed = attach_ctx.nodemask_to;
goto out;
+ }
+ /* Task iteration shouldn't happen with attach_ctx.many_dest_cs set */
cgroup_taskset_for_each(task, css, tset)
cpuset_attach_task(cs, task);
-out:
if (attach_ctx.task_work_queued)
schedule_flush_migrate_mm();
cs->old_mems_allowed = attach_ctx.nodemask_to;
-
- if (cs->nr_migrate_dl_tasks) {
- atomic_add(cs->nr_migrate_dl_tasks, &cs->nr_deadline_tasks);
- reset_migrate_dl_data(cs);
- }
-
+out:
clear_attach_data(&src_cs_head, false);
+ clear_attach_data(&dst_cs_head, false);
dec_attach_in_progress_locked();
mutex_unlock(&cpuset_mutex);