Cliff Wickman wrote:

Hi Cliff,

> Hi Keith,
> 
> On Wed, Oct 29, 2008 at 03:57:25PM +1100, Keith Owens wrote:
>> However there is a separate problem with your patch.  You now wait in
>> smp_kdb_stop() until all cpus are in KDB.  If any cpu is completely
>> hung so it cannot be interrupted then smp_kdb_stop() will never return
>> and KDB will now appear to hang.
>>
>> The existing code avoids this by
>>
>>   kdb() -> smp_kdb_stop() - issue KDB_VECTOR as normal interrupt but do not 
>> wait for cpus
>>   kdb() -> kdba_main_loop()
>>     kdba_main_loop() -> kdb_save_running()
>>       kdb_save_running() -> kdb_main_loop()
>>         kdb_main_loop() -> kdb_wait_for_cpus()
>>
>> kdb_wait_for_cpus() waits until the other cpus are in KDB.  If a cpu
>> does not respond to KDB_VECTOR after a few seconds then
>> kdb_wait_for_cpus() hits the missing cpus with NMI.
>>
>> This two step approach (send KDB_VECTOR as normal interrupt, wait then
>> send NMI) is used because NMI can be serviced at any time, even when
>> the target cpu is in the middle of servicing an interrupt.  This can
>> result in incomplete register state which leads to broken backtraces.
>> IOW, sending NMI first would actually make debugging harder.
>>
>> Given the above logic, if you are going to take over an existing
>> interrupt vector then the vector needs to be acquired near the start of
>> kdb() and released near the end of kdb(), and only on the master cpu.
>>
>> Note: there is no overwhelming need for KDB_VECTOR to have a high
>> priority.  As long as it is received within a few seconds then all is
>> well.
> 
> Thanks for the explanation.  I see your point.

I will let Keith to comment on the logic of your code, but this patch
will cause IA64 compilation to fail because kdb_giveback_vector()
is not defined for IA64.

Suggestions:
1) change kdb_takeover_vector and kdb_giveback_vector to arch-dependent
   version of kdba_takeover_vector and kdba_giveback_vector.
2) extern of kdba_giveback_vector should be moved to arch-dependent
   kdb.h (ie, arch/{ia64,x86}/include/asm/kdb.h.) and the ia64 version
   to be a dummy define.
3) kdbmain.c should change accordingly.

Thanks,
jay


> 
> How about if we keep the two step approach, but take over the vector
> when we need it, in step one.  Then give it back when the step two
>  wait is over.
> (assuming we don't take over a vector needed for the NMI)
> 
> Like this:
> 
> ---
>  arch/x86/kdb/kdbasupport_32.c |   22 ++++++++++++++++++----
>  arch/x86/kdb/kdbasupport_64.c |   23 +++++++++++++++++++----
>  include/asm-x86/irq_vectors.h |   11 ++++++-----
>  include/linux/kdb.h           |    1 +
>  kdb/kdbmain.c                 |    2 ++
>  5 files changed, 46 insertions(+), 13 deletions(-)
> 
> Index: 081002.linus/arch/x86/kdb/kdbasupport_32.c
> ===================================================================
> --- 081002.linus.orig/arch/x86/kdb/kdbasupport_32.c
> +++ 081002.linus/arch/x86/kdb/kdbasupport_32.c
> @@ -883,9 +883,6 @@ kdba_cpu_up(void)
>  static int __init
>  kdba_arch_init(void)
>  {
> -#ifdef       CONFIG_SMP
> -     set_intr_gate(KDB_VECTOR, kdb_interrupt);
> -#endif
>       set_intr_gate(KDBENTER_VECTOR, kdb_call);
>       return 0;
>  }
> @@ -1027,14 +1024,31 @@ kdba_verify_rw(unsigned long addr, size_
>  
>  #include <mach_ipi.h>
>  
> +gate_desc save_idt[NR_VECTORS];
> +
> +void kdb_takeover_vector(int vector)
> +{
> +     memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
> +     set_intr_gate(KDB_VECTOR, kdb_interrupt);
> +     return;
> +}
> +
> +void kdb_giveback_vector(int vector)
> +{
> +     native_write_idt_entry(idt_table, vector, &save_idt[vector]);
> +     return;
> +}
> +
>  /* When first entering KDB, try a normal IPI.  That reduces backtrace 
> problems
>   * on the other cpus.
>   */
>  void
>  smp_kdb_stop(void)
>  {
> -     if (!KDB_FLAG(NOIPI))
> +     if (!KDB_FLAG(NOIPI)) {
> +             kdb_takeover_vector(KDB_VECTOR);
>               send_IPI_allbutself(KDB_VECTOR);
> +     }
>  }
>  
>  /* The normal KDB IPI handler */
> Index: 081002.linus/arch/x86/kdb/kdbasupport_64.c
> ===================================================================
> --- 081002.linus.orig/arch/x86/kdb/kdbasupport_64.c
> +++ 081002.linus/arch/x86/kdb/kdbasupport_64.c
> @@ -21,6 +21,7 @@
>  #include <linux/interrupt.h>
>  #include <linux/module.h>
>  #include <linux/kdebug.h>
> +#include <linux/cpumask.h>
>  #include <asm/processor.h>
>  #include <asm/msr.h>
>  #include <asm/uaccess.h>
> @@ -900,9 +901,6 @@ kdba_cpu_up(void)
>  static int __init
>  kdba_arch_init(void)
>  {
> -#ifdef       CONFIG_SMP
> -     set_intr_gate(KDB_VECTOR, kdb_interrupt);
> -#endif
>       set_intr_gate(KDBENTER_VECTOR, kdb_call);
>       return 0;
>  }
> @@ -976,14 +974,31 @@ kdba_set_current_task(const struct task_
>  
>  #include <mach_ipi.h>
>  
> +gate_desc save_idt[NR_VECTORS];
> +
> +void kdb_takeover_vector(int vector)
> +{
> +     memcpy(&save_idt[vector], &idt_table[vector], sizeof(gate_desc));
> +     set_intr_gate(KDB_VECTOR, kdb_interrupt);
> +     return;
> +}
> +
> +void kdb_giveback_vector(int vector)
> +{
> +     native_write_idt_entry(idt_table, vector, &save_idt[vector]);
> +     return;
> +}
> +
>  /* When first entering KDB, try a normal IPI.  That reduces backtrace 
> problems
>   * on the other cpus.
>   */
>  void
>  smp_kdb_stop(void)
>  {
> -     if (!KDB_FLAG(NOIPI))
> +     if (!KDB_FLAG(NOIPI)) {
> +             kdb_takeover_vector(KDB_VECTOR);
>               send_IPI_allbutself(KDB_VECTOR);
> +     }
>  }
>  
>  /* The normal KDB IPI handler */
> Index: 081002.linus/include/asm-x86/irq_vectors.h
> ===================================================================
> --- 081002.linus.orig/include/asm-x86/irq_vectors.h
> +++ 081002.linus/include/asm-x86/irq_vectors.h
> @@ -66,7 +66,6 @@
>  # define RESCHEDULE_VECTOR           0xfc
>  # define CALL_FUNCTION_VECTOR                0xfb
>  # define CALL_FUNCTION_SINGLE_VECTOR 0xfa
> -#define      KDB_VECTOR                      0xf9
>  # define THERMAL_APIC_VECTOR         0xf0
>  
>  #else
> @@ -79,10 +78,6 @@
>  #define THERMAL_APIC_VECTOR          0xfa
>  #define THRESHOLD_APIC_VECTOR                0xf9
>  #define UV_BAU_MESSAGE                       0xf8
> -/* Overload KDB_VECTOR with UV_BAU_MESSAGE. By the time the UV hardware is
> - * ready, we should have moved to a dynamically allocated vector scheme.
> - */
> -#define KDB_VECTOR                   0xf8
>  #define INVALIDATE_TLB_VECTOR_END    0xf7
>  #define INVALIDATE_TLB_VECTOR_START  0xf0    /* f0-f7 used for TLB flush */
>  
> @@ -91,6 +86,12 @@
>  #endif
>  
>  /*
> + * KDB_VECTOR will take over vector 0xfe when it is needed, as in theory
> + * it should not be used anyway.
> + */
> +#define KDB_VECTOR                   0xfe
> +
> +/*
>   * Local APIC timer IRQ vector is on a different priority level,
>   * to work around the 'lost local interrupt if more than 2 IRQ
>   * sources per level' errata.
> Index: 081002.linus/include/linux/kdb.h
> ===================================================================
> --- 081002.linus.orig/include/linux/kdb.h
> +++ 081002.linus/include/linux/kdb.h
> @@ -89,6 +89,7 @@ extern volatile int kdb_flags;                      /* Glob
>  
>  extern void kdb_save_flags(void);
>  extern void kdb_restore_flags(void);
> +extern void kdb_giveback_vector(int);
>  
>  #define KDB_FLAG(flag)               (kdb_flags & KDB_FLAG_##flag)
>  #define KDB_FLAG_SET(flag)   ((void)(kdb_flags |= KDB_FLAG_##flag))
> Index: 081002.linus/kdb/kdbmain.c
> ===================================================================
> --- 081002.linus.orig/kdb/kdbmain.c
> +++ 081002.linus/kdb/kdbmain.c
> @@ -1673,6 +1673,8 @@ kdb_wait_for_cpus(void)
>                                       wait == 1 ? " is" : "s are",
>                                       wait == 1 ? "its" : "their");
>       }
> +     /* give back the vector we took over in smp_kdb_stop */
> +     kdb_giveback_vector(KDB_VECTOR);
>  #endif       /* CONFIG_SMP */
>  }
>  

---------------------------
Use http://oss.sgi.com/ecartis to modify your settings or to unsubscribe.

Reply via email to