From: Srivatsa S. Bhat <[email protected]>

In the event of CPU hotplug, the kernel modifies the cpusets'
cpus_allowed masks as and when necessary to ensure that the tasks belonging to 
the
cpusets have some place (online CPUs) to run on. And regular CPU hotplug is
destructive in the sense that the kernel doesn't remember the original cpuset
configurations set by the user, across hotplug operations.

However, suspend/resume (which uses CPU hotplug) is a special case in
which the kernel has the responsibility to restore the system (during
resume), to exactly the same state it was in before suspend.

commit   d35be8bab9b0ce44bed4b9453f86ebf64062721e
Author: Srivatsa S. Bhat <[email protected]>
Date:   Thu May 24 19:46:26 2012 +0530

        CPU hotplug, cpusets, suspend: Don't modify cpusets during 
suspend/resume

Please apply this patch to the stable trees 3.5.y, 3.4.y

Cc: Srivatsa S. Bhat <[email protected]>
Cc: Linus Torvalds <[email protected]>
Cc: Andrew Morton <[email protected]>
Cc: Peter Zijlstra <[email protected]>
Cc: Thomas Gleixner <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Greg KH <[email protected]>
Cc: [email protected]
Signed-off-by: Preeti U Murthy <[email protected]>
---
 kernel/cpuset.c     |    3 +++
 kernel/sched/core.c |   40 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 39 insertions(+), 4 deletions(-)

diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 14f7070..5fc1570 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2065,6 +2065,9 @@ static void scan_for_empty_cpusets(struct cpuset *root)
  * (of no affect) on systems that are actively using CPU hotplug
  * but making no active use of cpusets.
  *
+ * The only exception to this is suspend/resume, where we don't
+ * modify cpusets at all.
+ *
  * This routine ensures that top_cpuset.cpus_allowed tracks
  * cpu_active_mask on each CPU hotplug (cpuhp) event.
  *
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ef6a8f2..809e7cf 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -6937,34 +6937,66 @@ int __init 
sched_create_sysfs_power_savings_entries(struct device *dev)
 }
 #endif /* CONFIG_SCHED_MC || CONFIG_SCHED_SMT */
 
+static int num_cpus_frozen;    /* used to mark begin/end of suspend/resume */
+
 /*
  * Update cpusets according to cpu_active mask.  If cpusets are
  * disabled, cpuset_update_active_cpus() becomes a simple wrapper
  * around partition_sched_domains().
+ *
+ * If we come here as part of a suspend/resume, don't touch cpusets because we
+ * want to restore it back to its original state upon resume anyway.
  */
 static int cpuset_cpu_active(struct notifier_block *nfb, unsigned long action,
                             void *hcpu)
 {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
+       case CPU_ONLINE_FROZEN:
+       case CPU_DOWN_FAILED_FROZEN:
+
+               /*
+                * num_cpus_frozen tracks how many CPUs are involved in suspend
+                * resume sequence. As long as this is not the last online
+                * operation in the resume sequence, just build a single sched
+                * domain, ignoring cpusets.
+                */
+               num_cpus_frozen--;
+               if (likely(num_cpus_frozen)) {
+                       partition_sched_domains(1, NULL, NULL);
+                       break;
+               }
+
+               /*
+                * This is the last CPU online operation. So fall through and
+                * restore the original sched domains by considering the
+                * cpuset configurations.
+                */
+
        case CPU_ONLINE:
        case CPU_DOWN_FAILED:
                cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               break;
        default:
                return NOTIFY_DONE;
        }
+       return NOTIFY_OK;
 }
 
 static int cpuset_cpu_inactive(struct notifier_block *nfb, unsigned long 
action,
                               void *hcpu)
 {
-       switch (action & ~CPU_TASKS_FROZEN) {
+       switch (action) {
        case CPU_DOWN_PREPARE:
                cpuset_update_active_cpus();
-               return NOTIFY_OK;
+               break;
+       case CPU_DOWN_PREPARE_FROZEN:
+               num_cpus_frozen++;
+               partition_sched_domains(1, NULL, NULL);
+               break;
        default:
                return NOTIFY_DONE;
        }
+       return NOTIFY_OK;
 }
 
 void __init sched_init_smp(void)

--
To unsubscribe from this list: send the line "unsubscribe stable" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to