Re: [PATCH 6/6]suspend/resume SMP support

2005-04-14 Thread Li Shaohua
On Thu, 2005-04-14 at 16:27, Li Shaohua wrote:
> On Wed, 2005-04-13 at 16:32, Pavel Machek wrote:
> > [EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# dmesg | tail -25
> >  [] activate_task+0x1/0xa0
> >  [] resched_task+0x68/0x90
> >  [] try_to_wake_up+0x2aa/0x2f0
> >  [] fbcon_cursor+0x19a/0x270
> >  [] hide_cursor+0x18/0x30
> >  [] vt_console_print+0x24f/0x260
> >  [] vt_console_print+0x0/0x260
> >  [] __call_console_drivers+0x57/0x60
> >  [] call_console_drivers+0x80/0x110
> >  [] release_console_sem+0x4e/0xc0
> >  [] vprintk+0x192/0x240
> >  [] preempt_schedule_irq+0x51/0x80
> >  [] acpi_processor_idle+0x0/0x265
> >  [] need_resched+0x1f/0x21
> >  [] acpi_processor_idle+0x0/0x265
> >  [] printk+0x17/0x20
> >  [] cpu_init+0x73/0x360
> >  [] start_secondary+0x6/0x170
> > Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e
> > 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 
> > a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89
> >  <0>Kernel panic - not syncing: Attempted to kill the idle task!
> >  Stuck ??
> > Inquiring remote APIC #0...
> > ... APIC #0 ID: 
> > ... APIC #0 VERSION: 00040011
> > ... APIC #0 SPIV: 00ff
> > [EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1#
> Andrew,
> Below patch fixed Pavel's oops. But strange is the 'system_state' check
> is added for CPU hotplug by Rusty. This really makes me confused. Could
> you please look at it.
> This can be reproduced 100% with radeonfb driver load. Attached is the
> dmesg of an oops. It seems the 'objp' parameter for
> 'cache_alloc_debugcheck_after' is invalid.
Looks the per-cpu array_cache isn't initialized. It's initialized in a
cpuhotplug callback. So before the CPU call cpu_up, all kmalloc will
failed. Isn't it?

Thanks,
Shaohua

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6]suspend/resume SMP support

2005-04-14 Thread Li Shaohua
On Wed, 2005-04-13 at 16:32, Pavel Machek wrote:
> [EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# dmesg | tail -25
>  [] activate_task+0x1/0xa0
>  [] resched_task+0x68/0x90
>  [] try_to_wake_up+0x2aa/0x2f0
>  [] fbcon_cursor+0x19a/0x270
>  [] hide_cursor+0x18/0x30
>  [] vt_console_print+0x24f/0x260
>  [] vt_console_print+0x0/0x260
>  [] __call_console_drivers+0x57/0x60
>  [] call_console_drivers+0x80/0x110
>  [] release_console_sem+0x4e/0xc0
>  [] vprintk+0x192/0x240
>  [] preempt_schedule_irq+0x51/0x80
>  [] acpi_processor_idle+0x0/0x265
>  [] need_resched+0x1f/0x21
>  [] acpi_processor_idle+0x0/0x265
>  [] printk+0x17/0x20
>  [] cpu_init+0x73/0x360
>  [] start_secondary+0x6/0x170
> Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e
> 00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 
> a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89
>  <0>Kernel panic - not syncing: Attempted to kill the idle task!
>  Stuck ??
> Inquiring remote APIC #0...
> ... APIC #0 ID: 
> ... APIC #0 VERSION: 00040011
> ... APIC #0 SPIV: 00ff
> [EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1#
Andrew,
Below patch fixed Pavel's oops. But strange is the 'system_state' check
is added for CPU hotplug by Rusty. This really makes me confused. Could
you please look at it.
This can be reproduced 100% with radeonfb driver load. Attached is the
dmesg of an oops. It seems the 'objp' parameter for
'cache_alloc_debugcheck_after' is invalid.

Thanks,
Shaohua

--- a/kernel/printk.c   2005-04-12 10:12:19.0 +0800
+++ b/kernel/printk.c   2005-04-13 17:22:40.912897328 +0800
@@ -624,8 +624,7 @@ asmlinkage int vprintk(const char *fmt, 
log_level_unknown = 1;
}
 
-   if (!cpu_online(smp_processor_id()) &&
-   system_state != SYSTEM_RUNNING) {
+   if (!cpu_online(smp_processor_id())) {
/*
 * Some console drivers may assume that per-cpu resources have
 * been allocated.  So don't allow them to be called by this

CPU0 attaching NULL sched-domain.
CPU1 attaching NULL sched-domain.
CPU0 attaching NULL sched-domain.
Booting processor 1/1 eip 3000
Initializing CPU#1
masked ExtINT on CPU#1
Unable to handle kernel paging request at virtual address f000acb2
 printing eip:
c014e4cc
*pde = 
Oops:  [#1]
PREEMPT SMP 
Modules linked in:
CPU:1
EIP:0060:[]Not tainted VLI
EFLAGS: 00010097   (2.6.12-rc2-mm3) 
EIP is at check_poison_obj+0x4c/0x1e0
eax: 006b   ebx: 005a   ecx: dff6e080   edx: dff6e480
esi:    edi: f000acb2   ebp: 0080   esp: c14fdcd4
ds: 007b   es: 007b   ss: 0068
Process swapper (pid: 0, threadinfo=c14fc000 task=dff42560)
Stack: dff6e480 5a5a5a5a 5a5a5a5a 007f 5a5a5a5a  005a f000acae 
   dff6e480 c021192e c0150031 dff6e480 f000acae 5a5a5a5a 5a5a5a5a dff6e480 
   0046 0020 0010 c015044b dff6e480 0020 f000acae c021192e 
Call Trace:
 [] soft_cursor+0x5e/0x260
 [] cache_alloc_debugcheck_after+0x181/0x1a0
 [] __kmalloc+0x9b/0xd0
 [] soft_cursor+0x5e/0x260
 [] soft_cursor+0x5e/0x260
 [] bit_cursor+0x339/0x540
 [] recalc_task_prio+0x88/0x150
 [] fbcon_cursor+0x1a2/0x270
 [] hide_cursor+0x25/0x40
 [] vt_console_print+0x2aa/0x2b0
 [] __call_console_drivers+0x62/0x70
 [] call_console_drivers+0x96/0x130
 [] release_console_sem+0x51/0xc0
 [] vprintk+0x19f/0x250
 [] __do_softirq+0xd6/0xf0
 [] preempt_schedule_irq+0x4b/0x80
 [] printk+0x17/0x20
 [] setup_local_APIC+0xe2/0x1d0
 [] smp_callin+0x7a/0x120
 [] start_secondary+0xe/0x190
Code: 24 30 89 14 24 01 c7 e8 13 f8 ff ff 39 44 24 14 89 c5 0f 8d b7 00 00 00 
8d 40 ff 89 44 24 0c 3b 74 24 0c b0 6b 0f 84 8c 01 00 00 <38> 04 3e 74 46 8b 44 
24 14 85 c0 0f 84 48 01 00 00 89 3c 24 83 
 <0>Kernel panic - not syncing: Attempted to kill the idle task!
 Stuck ??
Inquiring remote APIC #1...
... APIC #1 ID: failed
... APIC #1 VERSION: failed
... APIC #1 SPIV: failed


Re: [PATCH 6/6]suspend/resume SMP support

2005-04-13 Thread Pavel Machek
Hi!

> > > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use
> > > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's
> > > original S4 SMP patch.
> > 
> > I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded
> > image from disk, but then I got
> > 
> > Thawing cpus 
> > Booting processor 1/0 eip 3000
> > 
> > ...and very funny effect on keyboard leds. They started to blink
> > (panic-like), but with very wrong frequency. It looked like 2 cpus
> > doing panic blinks at once...
> Check if /sys/device/system/cpu/cpu1/online attribute works. If it
> works, then it's other issue. I only tested the patches in two HT based
> systems.

Ok, this is PIII system 550MHz system:

[EMAIL PROTECTED]:/home/pavel# cat /proc/cpuinfo
processor   : 0
vendor_id   : GenuineIntel
cpu family  : 6
model   : 7
model name  : Pentium III (Katmai)
stepping: 3
cpu MHz : 551.309
cache size  : 512 KB
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 mmx fxsr sse
bogomips: 1085.44

core id : 255
cpu cores   : 1
processor   : 1
vendor_id   : GenuineIntel
cpu family  : 6
model   : 7
model name  : Pentium III (Katmai)
stepping: 3
cpu MHz : 551.309
cache size  : 512 KB
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 mmx fxsr sse
bogomips: 1097.72

core id : 255
cpu cores   : 1
[EMAIL PROTECTED]:/home/pavel#

Offlining CPU seems to work ok:

[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# cat online
1
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# echo 0 > online
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# sync
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# cat /proc/cpuinfo
processor   : 0
vendor_id   : GenuineIntel
cpu family  : 6
model   : 7
model name  : Pentium III (Katmai)
stepping: 3
cpu MHz : 551.309
cache size  : 512 KB
fdiv_bug: no
hlt_bug : no
f00f_bug: no
coma_bug: no
fpu : yes
fpu_exception   : yes
cpuid level : 2
wp  : yes
flags   : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 mmx fxsr sse
bogomips: 1085.44

core id : 255
cpu cores   : 1
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1#

Putting cpu back online, I get:

Booting processor 1/0 eip 3000

on console, and mess in syslog. Seems like my cpu #1 panicked but cpu
#0 just keeps going?!

[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# dmesg | tail -20
 [] vt_console_print+0x24f/0x260
 [] vt_console_print+0x0/0x260
 [] __call_console_drivers+0x57/0x60
 [] call_console_drivers+0x80/0x110
 [] release_console_sem+0x4e/0xc0
 [] vprintk+0x192/0x240
 [] preempt_schedule_irq+0x51/0x80
 [] acpi_processor_idle+0x0/0x265
 [] need_resched+0x1f/0x21
 [] acpi_processor_idle+0x0/0x265
 [] printk+0x17/0x20
 [] cpu_init+0x73/0x360
 [] start_secondary+0x6/0x170
Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e
00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 
a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89
 <0>Kernel panic - not syncing: Attempted to kill the idle task!
 Stuck ??
Inquiring remote APIC #0...
... APIC #0 ID: 
... APIC #0 VERSION: 00040011
... APIC #0 SPIV: 00ff
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1# dmesg | tail -25
 [] activate_task+0x1/0xa0
 [] resched_task+0x68/0x90
 [] try_to_wake_up+0x2aa/0x2f0
 [] fbcon_cursor+0x19a/0x270
 [] hide_cursor+0x18/0x30
 [] vt_console_print+0x24f/0x260
 [] vt_console_print+0x0/0x260
 [] __call_console_drivers+0x57/0x60
 [] call_console_drivers+0x80/0x110
 [] release_console_sem+0x4e/0xc0
 [] vprintk+0x192/0x240
 [] preempt_schedule_irq+0x51/0x80
 [] acpi_processor_idle+0x0/0x265
 [] need_resched+0x1f/0x21
 [] acpi_processor_idle+0x0/0x265
 [] printk+0x17/0x20
 [] cpu_init+0x73/0x360
 [] start_secondary+0x6/0x170
Code: d2 74 bd fc 8b 44 24 28 b9 0e 00 00 00 8b 74 24 14 01 c6 b8 0e
00 00 00 89 74 24 1c 8b 74 24 30 89 44 24 10 8b 7c 24 1c 83 c6 10 
a5 8b 74 24 24 8b 44 24 1c 89 4c 24 10 01 ee f7 d5 21 ee 89
 <0>Kernel panic - not syncing: Attempted to kill the idle task!
 Stuck ??
Inquiring remote APIC #0...
... APIC #0 ID: 
... APIC #0 VERSION: 00040011
... APIC #0 SPIV: 00ff
[EMAIL PROTECTED]:/sys/devices/system/cpu/cpu1#

Pavel
-- 
Boycott Kodak -- for their patent abuse against Java.
-
To unsubscribe from this list: send the line "unsubscribe linux

Re: [PATCH 6/6]suspend/resume SMP support

2005-04-12 Thread Li Shaohua
On Tue, 2005-04-12 at 18:51, Pavel Machek wrote:
> > Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use
> > disable/enable_nonboot_cpus API. The S4 part is based on Pavel's
> > original S4 SMP patch.
> 
> I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded
> image from disk, but then I got
> 
> Thawing cpus 
> Booting processor 1/0 eip 3000
> 
> ...and very funny effect on keyboard leds. They started to blink
> (panic-like), but with very wrong frequency. It looked like 2 cpus
> doing panic blinks at once...
Check if /sys/device/system/cpu/cpu1/online attribute works. If it
works, then it's other issue. I only tested the patches in two HT based
systems.

Thanks,
Shaohua

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6]suspend/resume SMP support

2005-04-12 Thread Pavel Machek
Hi!

> Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use
> disable/enable_nonboot_cpus API. The S4 part is based on Pavel's
> original S4 SMP patch.

I tested it on 2x PII(?) 550MHz system. Suspend went ok, resume loaded
image from disk, but then I got

Thawing cpus 
Booting processor 1/0 eip 3000

...and very funny effect on keyboard leds. They started to blink
(panic-like), but with very wrong frequency. It looked like 2 cpus
doing panic blinks at once...

Pavel

-- 
Boycott Kodak -- for their patent abuse against Java.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


Re: [PATCH 6/6]suspend/resume SMP support

2005-04-12 Thread Pavel Machek
Hi!

> Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use
> disable/enable_nonboot_cpus API. The S4 part is based on Pavel's
> original S4 SMP patch.

The series looks good to me, but I was not yet able to actually try
it. Will try to do that in few hours.
Pavel

-- 
Boycott Kodak -- for their patent abuse against Java.
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/


[PATCH 6/6]suspend/resume SMP support

2005-04-11 Thread Li Shaohua
Using CPU hotplug to support suspend/resume SMP. Both S3 and S4 use
disable/enable_nonboot_cpus API. The S4 part is based on Pavel's
original S4 SMP patch.

Signed-off-by: Li Shaohua<[EMAIL PROTECTED]>
---

 linux-2.6.11-root/drivers/acpi/Kconfig|2 
 linux-2.6.11-root/include/linux/suspend.h |2 
 linux-2.6.11-root/kernel/power/Kconfig|2 
 linux-2.6.11-root/kernel/power/disk.c |   36 ++-
 linux-2.6.11-root/kernel/power/main.c |   16 +++--
 linux-2.6.11-root/kernel/power/smp.c  |   91 +++---
 linux-2.6.11-root/kernel/power/swsusp.c   |2 
 7 files changed, 69 insertions(+), 82 deletions(-)

diff -puN drivers/acpi/Kconfig~smp_sleep drivers/acpi/Kconfig
--- linux-2.6.11/drivers/acpi/Kconfig~smp_sleep 2005-04-12 11:11:14.884685080 
+0800
+++ linux-2.6.11-root/drivers/acpi/Kconfig  2005-04-12 11:11:14.898682952 
+0800
@@ -57,7 +57,7 @@ if ACPI_INTERPRETER
 
 config ACPI_SLEEP
bool "Sleep States (EXPERIMENTAL)"
-   depends on X86
+   depends on X86 && (!SMP || HOTPLUG_CPU)
depends on EXPERIMENTAL
default y
---help---
diff -puN include/linux/suspend.h~smp_sleep include/linux/suspend.h
--- linux-2.6.11/include/linux/suspend.h~smp_sleep  2005-04-12 
11:11:14.885684928 +0800
+++ linux-2.6.11-root/include/linux/suspend.h   2005-04-12 11:11:14.898682952 
+0800
@@ -58,7 +58,7 @@ static inline int software_suspend(void)
 }
 #endif
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_HOTPLUG_CPU
 extern void disable_nonboot_cpus(void);
 extern void enable_nonboot_cpus(void);
 #else
diff -puN kernel/power/disk.c~smp_sleep kernel/power/disk.c
--- linux-2.6.11/kernel/power/disk.c~smp_sleep  2005-04-12 11:11:14.887684624 
+0800
+++ linux-2.6.11-root/kernel/power/disk.c   2005-04-12 11:11:14.899682800 
+0800
@@ -117,8 +117,8 @@ static void finish(void)
 {
device_resume();
platform_finish();
-   enable_nonboot_cpus();
thaw_processes();
+   enable_nonboot_cpus();
pm_restore_console();
 }
 
@@ -131,28 +131,36 @@ static int prepare_processes(void)
 
sys_sync();
 
+   disable_nonboot_cpus();
+
if (freeze_processes()) {
error = -EBUSY;
-   return error;
+   goto enable_cpu;
}
 
if (pm_disk_mode == PM_DISK_PLATFORM) {
if (pm_ops && pm_ops->prepare) {
if ((error = pm_ops->prepare(PM_SUSPEND_DISK)))
-   return error;
+   goto thaw;
}
}
 
/* Free memory before shutting down devices. */
free_some_memory();
-
return 0;
+thaw:
+   thaw_processes();
+enable_cpu:
+   enable_nonboot_cpus();
+   pm_restore_console();
+   return error;
 }
 
 static void unprepare_processes(void)
 {
-   enable_nonboot_cpus();
+   platform_finish();
thaw_processes();
+   enable_nonboot_cpus();
pm_restore_console();
 }
 
@@ -160,15 +168,9 @@ static int prepare_devices(void)
 {
int error;
 
-   disable_nonboot_cpus();
-   if ((error = device_suspend(PMSG_FREEZE))) {
+   if ((error = device_suspend(PMSG_FREEZE)))
printk("Some devices failed to suspend\n");
-   platform_finish();
-   enable_nonboot_cpus();
-   return error;
-   }
-
-   return 0;
+   return error;
 }
 
 /**
@@ -185,9 +187,9 @@ int pm_suspend_disk(void)
int error;
 
error = prepare_processes();
-   if (!error) {
-   error = prepare_devices();
-   }
+   if (error)
+   return error;
+   error = prepare_devices();
 
if (error) {
unprepare_processes();
@@ -250,7 +252,7 @@ static int software_resume(void)
 
if ((error = prepare_processes())) {
swsusp_close();
-   goto Cleanup;
+   goto Done;
}
 
pr_debug("PM: Reading swsusp image.\n");
diff -puN kernel/power/Kconfig~smp_sleep kernel/power/Kconfig
--- linux-2.6.11/kernel/power/Kconfig~smp_sleep 2005-04-12 11:11:14.888684472 
+0800
+++ linux-2.6.11-root/kernel/power/Kconfig  2005-04-12 11:11:14.899682800 
+0800
@@ -28,7 +28,7 @@ config PM_DEBUG
 
 config SOFTWARE_SUSPEND
bool "Software Suspend (EXPERIMENTAL)"
-   depends on EXPERIMENTAL && PM && SWAP
+   depends on EXPERIMENTAL && PM && SWAP && (HOTPLUG_CPU || !SMP)
---help---
  Enable the possibility of suspending the machine.
  It doesn't need APM.
diff -puN kernel/power/main.c~smp_sleep kernel/power/main.c
--- linux-2.6.11/kernel/power/main.c~smp_sleep  2005-04-12 11:11:14.890684168 
+0800
+++ linux-2.6.11-root/kernel/power/main.c   2005-04-12 11:11:14.899682800 
+0800
@@ -59,6 +59,13 @@ static int suspend_prepare(suspend_state
 
pm_prepare_console();
 
+   disable_nonboot_cpus();
+
+   if (num_online_cpus() != 1) {
+