Re: REGRESSION: boot stalls on several old dual core Intel CPUs

2018-09-03 Thread Kevin Shanahan
On Mon, Sep 03, 2018 at 11:33:05AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 03, 2018 at 10:54:23AM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 03, 2018 at 09:38:15AM +0200, Thomas Gleixner wrote:
> > > On Mon, 3 Sep 2018, Peter Zijlstra wrote:
> > > > On Sat, Sep 01, 2018 at 11:51:26AM +0930, Kevin Shanahan wrote:
> > > > > commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
> > > > > Author: Martin Schwidefsky 
> > > > > Date:   Tue Aug 18 17:09:42 2009 +0200
> > > > > 
> > > > > clocksource: Avoid clocksource watchdog circular locking 
> > > > > dependency
> > > > > 
> > > > > stop_machine from a multithreaded workqueue is not allowed because
> > > > > of a circular locking dependency between cpu_down and the 
> > > > > workqueue
> > > > > execution. Use a kernel thread to do the clocksource downgrade.
> > > > 
> > > > I cannot find stop_machine usage there; either it went away or I need to
> > > > like wake up.
> > > 
> > > timekeeping_notify() which is involved in switching clock source uses 
> > > stomp
> > > machine.
> > 
> > ARGH... OK, lemme see if I can come up with something other than
> > endlessly spawning that kthread.
> > 
> > A special purpose kthread_worker would make more sense than that.
> 
> Can someone test this?

Boots for me (applied on top of 4.18.5).

Tested-by: Kevin Shanahan 

> ---
>  kernel/time/clocksource.c | 28 ++--
>  1 file changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
> index f74fb00d8064..898976d0082a 100644
> --- a/kernel/time/clocksource.c
> +++ b/kernel/time/clocksource.c
> @@ -112,13 +112,28 @@ static int finished_booting;
>  static u64 suspend_start;
>  
>  #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
> -static void clocksource_watchdog_work(struct work_struct *work);
> +static void clocksource_watchdog_work(struct kthread_work *work);
>  static void clocksource_select(void);
>  
>  static LIST_HEAD(watchdog_list);
>  static struct clocksource *watchdog;
>  static struct timer_list watchdog_timer;
> -static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
> +
> +/*
> + * We must use a kthread_worker here, because:
> + *
> + *   clocksource_watchdog_work()
> + * clocksource_select()
> + *   __clocksource_select()
> + * timekeeping_notify()
> + *   stop_machine()
> + *
> + * cannot be called from a reqular workqueue, because of deadlocks between
> + * workqueue and stopmachine.
> + */
> +static struct kthread_worker *watchdog_worker;
> +static DEFINE_KTHREAD_WORK(watchdog_work, clocksource_watchdog_work);
> +
>  static DEFINE_SPINLOCK(watchdog_lock);
>  static int watchdog_running;
>  static atomic_t watchdog_reset_pending;
> @@ -158,7 +173,7 @@ static void __clocksource_unstable(struct clocksource *cs)
>  
>   /* kick clocksource_watchdog_work() */
>   if (finished_booting)
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, _work);
>  }
>  
>  /**
> @@ -199,7 +214,7 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>   /* Clocksource already marked unstable? */
>   if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
>   if (finished_booting)
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, 
> _work);
>   continue;
>   }
>  
> @@ -269,7 +284,7 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>*/
>   if (cs != curr_clocksource) {
>   cs->flags |= CLOCK_SOURCE_RESELECT;
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, 
> _work);
>   } else {
>   tick_clock_notify();
>   }
> @@ -418,7 +433,7 @@ static int __clocksource_watchdog_work(void)
>   return select;
>  }
>  
> -static void clocksource_watchdog_work(struct work_struct *work)
> +static void clocksource_watchdog_work(struct kthread_work *work)
>  {
>   mutex_lock(_mutex);
>   if (__clocksource_watchdog_work())
> @@ -806,6 +821,7 @@ static int __init clocksource_done_booting(void)
>  {
>   mutex_lock(_mutex);
>   curr_clocksource = clocksource_default_clock();
> + watchdog_worker = kthread_create_worker(0, "cs-watchdog");
>   finished_booting = 1;
>   /*
>* Run the watchdog first to eliminate unstable clock sources


Re: REGRESSION: boot stalls on several old dual core Intel CPUs

2018-09-03 Thread Kevin Shanahan
On Mon, Sep 03, 2018 at 11:33:05AM +0200, Peter Zijlstra wrote:
> On Mon, Sep 03, 2018 at 10:54:23AM +0200, Peter Zijlstra wrote:
> > On Mon, Sep 03, 2018 at 09:38:15AM +0200, Thomas Gleixner wrote:
> > > On Mon, 3 Sep 2018, Peter Zijlstra wrote:
> > > > On Sat, Sep 01, 2018 at 11:51:26AM +0930, Kevin Shanahan wrote:
> > > > > commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
> > > > > Author: Martin Schwidefsky 
> > > > > Date:   Tue Aug 18 17:09:42 2009 +0200
> > > > > 
> > > > > clocksource: Avoid clocksource watchdog circular locking 
> > > > > dependency
> > > > > 
> > > > > stop_machine from a multithreaded workqueue is not allowed because
> > > > > of a circular locking dependency between cpu_down and the 
> > > > > workqueue
> > > > > execution. Use a kernel thread to do the clocksource downgrade.
> > > > 
> > > > I cannot find stop_machine usage there; either it went away or I need to
> > > > like wake up.
> > > 
> > > timekeeping_notify() which is involved in switching clock source uses 
> > > stomp
> > > machine.
> > 
> > ARGH... OK, lemme see if I can come up with something other than
> > endlessly spawning that kthread.
> > 
> > A special purpose kthread_worker would make more sense than that.
> 
> Can someone test this?

Boots for me (applied on top of 4.18.5).

Tested-by: Kevin Shanahan 

> ---
>  kernel/time/clocksource.c | 28 ++--
>  1 file changed, 22 insertions(+), 6 deletions(-)
> 
> diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
> index f74fb00d8064..898976d0082a 100644
> --- a/kernel/time/clocksource.c
> +++ b/kernel/time/clocksource.c
> @@ -112,13 +112,28 @@ static int finished_booting;
>  static u64 suspend_start;
>  
>  #ifdef CONFIG_CLOCKSOURCE_WATCHDOG
> -static void clocksource_watchdog_work(struct work_struct *work);
> +static void clocksource_watchdog_work(struct kthread_work *work);
>  static void clocksource_select(void);
>  
>  static LIST_HEAD(watchdog_list);
>  static struct clocksource *watchdog;
>  static struct timer_list watchdog_timer;
> -static DECLARE_WORK(watchdog_work, clocksource_watchdog_work);
> +
> +/*
> + * We must use a kthread_worker here, because:
> + *
> + *   clocksource_watchdog_work()
> + * clocksource_select()
> + *   __clocksource_select()
> + * timekeeping_notify()
> + *   stop_machine()
> + *
> + * cannot be called from a reqular workqueue, because of deadlocks between
> + * workqueue and stopmachine.
> + */
> +static struct kthread_worker *watchdog_worker;
> +static DEFINE_KTHREAD_WORK(watchdog_work, clocksource_watchdog_work);
> +
>  static DEFINE_SPINLOCK(watchdog_lock);
>  static int watchdog_running;
>  static atomic_t watchdog_reset_pending;
> @@ -158,7 +173,7 @@ static void __clocksource_unstable(struct clocksource *cs)
>  
>   /* kick clocksource_watchdog_work() */
>   if (finished_booting)
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, _work);
>  }
>  
>  /**
> @@ -199,7 +214,7 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>   /* Clocksource already marked unstable? */
>   if (cs->flags & CLOCK_SOURCE_UNSTABLE) {
>   if (finished_booting)
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, 
> _work);
>   continue;
>   }
>  
> @@ -269,7 +284,7 @@ static void clocksource_watchdog(struct timer_list 
> *unused)
>*/
>   if (cs != curr_clocksource) {
>   cs->flags |= CLOCK_SOURCE_RESELECT;
> - schedule_work(_work);
> + kthread_queue_work(watchdog_worker, 
> _work);
>   } else {
>   tick_clock_notify();
>   }
> @@ -418,7 +433,7 @@ static int __clocksource_watchdog_work(void)
>   return select;
>  }
>  
> -static void clocksource_watchdog_work(struct work_struct *work)
> +static void clocksource_watchdog_work(struct kthread_work *work)
>  {
>   mutex_lock(_mutex);
>   if (__clocksource_watchdog_work())
> @@ -806,6 +821,7 @@ static int __init clocksource_done_booting(void)
>  {
>   mutex_lock(_mutex);
>   curr_clocksource = clocksource_default_clock();
> + watchdog_worker = kthread_create_worker(0, "cs-watchdog");
>   finished_booting = 1;
>   /*
>* Run the watchdog first to eliminate unstable clock sources


Re: REGRESSION: boot stalls on several old dual core Intel CPUs

2018-08-31 Thread Kevin Shanahan
On Thu, Aug 30, 2018 at 03:04:39PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 30, 2018 at 12:55:30PM +0200, Siegfried Metz wrote:
> > Dear kernel developers,
> > 
> > since mainline kernel 4.18 (up to the latest mainline kernel 4.18.5)
> > Intel Core 2 Duo processors are affected by boot stalling early in the
> > boot process. As it is so early there is no dmesg output (or any log).
> > 
> > A few users in the Arch Linux community used git bisect and tracked the
> > issue down to this the bad commit:
> > 7197e77abcb65a71d0b21d67beb24f153a96055e clocksource: Remove kthread
> 
> I just dug out my core2duo laptop (Lenovo T500) and build a tip/master
> kernel for it (x86_64 debian distro .config).
> 
> Seems to boot just fine.. 3/3 so far.
> 
> Any other clues?

One additional data point, my affected system is a Dell Latitude E6400
laptop which has a P8400 CPU:

  vendor_id : GenuineIntel
  cpu family: 6
  model : 23
  model name: Intel(R) Core(TM)2 Duo CPU P8400  @ 2.26GHz
  stepping  : 6
  microcode : 0x610

Judging from what is being discussed in the Arch forums, it does seem
to related to the CPU having unstable TSC and transitioning to another
clock source.  Workarounds that seem to be reliable are either booting
with clocksource= or with nosmp.

One person did point out that the commit that introduced the kthread
did so to remove a deadlock - is the circular locking dependency
mentioned in that commit still relevant?

commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
Author: Martin Schwidefsky 
Date:   Tue Aug 18 17:09:42 2009 +0200

clocksource: Avoid clocksource watchdog circular locking dependency

stop_machine from a multithreaded workqueue is not allowed because
of a circular locking dependency between cpu_down and the workqueue
execution. Use a kernel thread to do the clocksource downgrade.

Signed-off-by: Martin Schwidefsky 
Cc: Peter Zijlstra 
Cc: john stultz 
LKML-Reference: <20090818170942.3ab80c91@skybase>
Signed-off-by: Thomas Gleixner 

Thanks,
Kevin.


Re: REGRESSION: boot stalls on several old dual core Intel CPUs

2018-08-31 Thread Kevin Shanahan
On Thu, Aug 30, 2018 at 03:04:39PM +0200, Peter Zijlstra wrote:
> On Thu, Aug 30, 2018 at 12:55:30PM +0200, Siegfried Metz wrote:
> > Dear kernel developers,
> > 
> > since mainline kernel 4.18 (up to the latest mainline kernel 4.18.5)
> > Intel Core 2 Duo processors are affected by boot stalling early in the
> > boot process. As it is so early there is no dmesg output (or any log).
> > 
> > A few users in the Arch Linux community used git bisect and tracked the
> > issue down to this the bad commit:
> > 7197e77abcb65a71d0b21d67beb24f153a96055e clocksource: Remove kthread
> 
> I just dug out my core2duo laptop (Lenovo T500) and build a tip/master
> kernel for it (x86_64 debian distro .config).
> 
> Seems to boot just fine.. 3/3 so far.
> 
> Any other clues?

One additional data point, my affected system is a Dell Latitude E6400
laptop which has a P8400 CPU:

  vendor_id : GenuineIntel
  cpu family: 6
  model : 23
  model name: Intel(R) Core(TM)2 Duo CPU P8400  @ 2.26GHz
  stepping  : 6
  microcode : 0x610

Judging from what is being discussed in the Arch forums, it does seem
to related to the CPU having unstable TSC and transitioning to another
clock source.  Workarounds that seem to be reliable are either booting
with clocksource= or with nosmp.

One person did point out that the commit that introduced the kthread
did so to remove a deadlock - is the circular locking dependency
mentioned in that commit still relevant?

commit 01548f4d3e8e94caf323a4f664eb347fd34a34ab
Author: Martin Schwidefsky 
Date:   Tue Aug 18 17:09:42 2009 +0200

clocksource: Avoid clocksource watchdog circular locking dependency

stop_machine from a multithreaded workqueue is not allowed because
of a circular locking dependency between cpu_down and the workqueue
execution. Use a kernel thread to do the clocksource downgrade.

Signed-off-by: Martin Schwidefsky 
Cc: Peter Zijlstra 
Cc: john stultz 
LKML-Reference: <20090818170942.3ab80c91@skybase>
Signed-off-by: Thomas Gleixner 

Thanks,
Kevin.


Linux 2.2.17 Oops

2000-11-14 Thread Kevin Shanahan

Hi,

Over the weekend sometime a Linux machine where I work oops'd. Nobody else
here really understands anything about this stuff so they rang me. I got
them to copy out what was on the screen and then reboot (this was
Monday). Today (Wednesday) I copied in the text and ran it through
ksymoops.

Note that the 6th number in the first row of the "Stack" output was copied
out as "aad". I just assumed that they missed a leading zero, which
is probably correct, but may not be.

The machine had been up for about 25 days, and has never crashed
before.

Umm, I'm not sure what other info you might need to pinpoint the
problem, but feel free to ask for whatever is necessary. I'm not
subscribed to the list, so please CC any replies to me.

Thanks,
Kevin Shanahan.


 ksymoops output 

ksymoops 0.7c on i686 2.2.17.  Options used
 -V (default)
 -k /proc/ksyms (default)
 -l /proc/modules (default)
 -o /lib/modules/2.2.17/ (default)
 -m /boot/System.map (specified)

Unable to handle kernel NULL pointer dereference at virtual address 0011
current->tss.cr3 = 00101000, %cr3 = 00101000
*pde = 
Oops: 
CPU: 0
EIP: 0010:[]
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010202
eax: 59ad ebx: 0001 ecx: 02206fc0 edx: 19ad
esi: 0aad1400 edi: 4000 ebp: 5400a8c0 esp: c01d7e80
ds: 0018 es: 0018 ss: 0018
Stack:  c01d7edc c014f0e7 c16c0028 c178 0aad 0003 14000ea4
   02206fc0 c16ced54  c0155c52 c02b7000 c0155cb0 c02b7000 c02b7000
   c16ced40 c16ced40 c01d7f00 0800 c0154f50 c16ce820 5400a800 c01d7f00
Call Trace: [] [] [] [] []
[] [] [] [] []
[] [] [] [] []
[] [] [] []
Code: 39 4b 10 75 17 39 6b 14 75 12 39 73 18 75 0d 8b 43 1c 85 c0

>>EIP; c0161804<=
Trace; c014f0e7 <__ip_masq_set_expire+1f/34>
Trace; c0155c52 
Trace; c0155cb0 
Trace; c0154f50 
Trace; c0154f84 
Trace; c0155255 
Trace; c01553d9 
Trace; c014aadd 
Trace; c01164d1 
Trace; c0108b6f 
Trace; c010883c 
Trace; c0106249 
Trace; c0106000 
Trace; c010626c 
Trace; c01079bc 
Trace; c0106000 
Trace; c010607b 
Trace; c0106000 
Trace; c0100175 
Code;  c0161804 
 <_EIP>:
Code;  c0161804<=
   0:   39 4b 10  cmp%ecx,0x10(%ebx)   <=
Code;  c0161807 
   3:   75 17 jne1c <_EIP+0x1c> c0161820 
Code;  c0161809 
   5:   39 6b 14  cmp%ebp,0x14(%ebx)
Code;  c016180c 
   8:   75 12 jne1c <_EIP+0x1c> c0161820 
Code;  c016180e 
   a:   39 73 18  cmp%esi,0x18(%ebx)
Code;  c0161811 
   d:   75 0d jne1c <_EIP+0x1c> c0161820 
Code;  c0161813 
   f:   8b 43 1c  mov0x1c(%ebx),%eax
Code;  c0161816 
  12:   85 c0 test   %eax,%eax

Aiee, killing interrupt handler
kernel panic: Attempted to kill the idle task!
In swapper task - not syncing.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/



Linux 2.2.17 Oops

2000-11-14 Thread Kevin Shanahan

Hi,

Over the weekend sometime a Linux machine where I work oops'd. Nobody else
here really understands anything about this stuff so they rang me. I got
them to copy out what was on the screen and then reboot (this was
Monday). Today (Wednesday) I copied in the text and ran it through
ksymoops.

Note that the 6th number in the first row of the "Stack" output was copied
out as "aad". I just assumed that they missed a leading zero, which
is probably correct, but may not be.

The machine had been up for about 25 days, and has never crashed
before.

Umm, I'm not sure what other info you might need to pinpoint the
problem, but feel free to ask for whatever is necessary. I'm not
subscribed to the list, so please CC any replies to me.

Thanks,
Kevin Shanahan.


 ksymoops output 

ksymoops 0.7c on i686 2.2.17.  Options used
 -V (default)
 -k /proc/ksyms (default)
 -l /proc/modules (default)
 -o /lib/modules/2.2.17/ (default)
 -m /boot/System.map (specified)

Unable to handle kernel NULL pointer dereference at virtual address 0011
current-tss.cr3 = 00101000, %cr3 = 00101000
*pde = 
Oops: 
CPU: 0
EIP: 0010:[c0161804]
Using defaults from ksymoops -t elf32-i386 -a i386
EFLAGS: 00010202
eax: 59ad ebx: 0001 ecx: 02206fc0 edx: 19ad
esi: 0aad1400 edi: 4000 ebp: 5400a8c0 esp: c01d7e80
ds: 0018 es: 0018 ss: 0018
Stack:  c01d7edc c014f0e7 c16c0028 c178 0aad 0003 14000ea4
   02206fc0 c16ced54  c0155c52 c02b7000 c0155cb0 c02b7000 c02b7000
   c16ced40 c16ced40 c01d7f00 0800 c0154f50 c16ce820 5400a800 c01d7f00
Call Trace: [c014f0e7] [c0155c52] [c0155cb0] [c0154f50] [c0154f84]
[c0155255] [c01553d9] [c014aadd] [c01164d1] [c0108b6f]
[c010883c] [c0106249] [c0106000] [c010626c] [c01079bc]
[c0106000] [c010607b] [c0106000] [c0100175]
Code: 39 4b 10 75 17 39 6b 14 75 12 39 73 18 75 0d 8b 43 1c 85 c0

EIP; c0161804 tcp_chkaddr+140/1f0   =
Trace; c014f0e7 __ip_masq_set_expire+1f/34
Trace; c0155c52 ip_chksock+3a/50
Trace; c0155cb0 ip_forward+48/4d8
Trace; c0154f50 ip_local_deliver+dc/2a0
Trace; c0154f84 ip_local_deliver+110/2a0
Trace; c0155255 ip_rcv+141/2f4
Trace; c01553d9 ip_rcv+2c5/2f4
Trace; c014aadd net_bh+179/1d4
Trace; c01164d1 do_bottom_half+45/64
Trace; c0108b6f do_IRQ+3b/40
Trace; c010883c common_interrupt+18/20
Trace; c0106249 cpu_idle+5d/6c
Trace; c0106000 get_options+0/74
Trace; c010626c sys_idle+14/24
Trace; c01079bc system_call+34/38
Trace; c0106000 get_options+0/74
Trace; c010607b cpu_idle+7/18
Trace; c0106000 get_options+0/74
Trace; c0100175 L6+0/2
Code;  c0161804 tcp_chkaddr+140/1f0
 _EIP:
Code;  c0161804 tcp_chkaddr+140/1f0   =
   0:   39 4b 10  cmp%ecx,0x10(%ebx)   =
Code;  c0161807 tcp_chkaddr+143/1f0
   3:   75 17 jne1c _EIP+0x1c c0161820 tcp_chkaddr+15c/1f0
Code;  c0161809 tcp_chkaddr+145/1f0
   5:   39 6b 14  cmp%ebp,0x14(%ebx)
Code;  c016180c tcp_chkaddr+148/1f0
   8:   75 12 jne1c _EIP+0x1c c0161820 tcp_chkaddr+15c/1f0
Code;  c016180e tcp_chkaddr+14a/1f0
   a:   39 73 18  cmp%esi,0x18(%ebx)
Code;  c0161811 tcp_chkaddr+14d/1f0
   d:   75 0d jne1c _EIP+0x1c c0161820 tcp_chkaddr+15c/1f0
Code;  c0161813 tcp_chkaddr+14f/1f0
   f:   8b 43 1c  mov0x1c(%ebx),%eax
Code;  c0161816 tcp_chkaddr+152/1f0
  12:   85 c0 test   %eax,%eax

Aiee, killing interrupt handler
kernel panic: Attempted to kill the idle task!
In swapper task - not syncing.

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [EMAIL PROTECTED]
Please read the FAQ at http://www.tux.org/lkml/