Normally the x86-64 trap handlers for debug/int 3/stack fault run
on a special interrupt stack to make them more robust
when dealing with kernel code.

The PREEMPT_RT kernel can sleep in locks even while allocating
GFP_ATOMIC memory. When one of these trap handlers needs to send 
real time signals for ptrace it allocates memory and could then
try to to schedule.  But it is not allowed to schedule on a
IST stack. This can cause warnings and hangs.

This patch disables the IST stacks for these handlers for PREEMPT_RT
kernel. Instead let them run on the normal process stack.

The kernel only really needs the ISTs here to make kernel debuggers more 
robust in case someone sets a break point somewhere where the stack is 
invalid. But there are no kernel debuggers in the standard kernel
that do this.

It also means kprobes cannot be set in situations with invalid stack;
but that sounds like a reasonable restriction.

The stack fault change could minimally impact oops quality, but not very 
much because stack faults are fairly rare.

A better solution would be to use similar logic as the NMI "paranoid"
path: check if signal is for user space, if yes go back to entry.S, switch 
stack, 
call sync_regs, then do the signal sending etc.  

But this patch is much simpler and should work too with minimal impact.

Signed-off-by: Andi Kleen <[EMAIL PROTECTED]>

Index: linux-2.6.23-rc4-rt1/arch/x86_64/kernel/setup64.c
===================================================================
--- linux-2.6.23-rc4-rt1.orig/arch/x86_64/kernel/setup64.c
+++ linux-2.6.23-rc4-rt1/arch/x86_64/kernel/setup64.c
@@ -242,7 +242,9 @@ void __cpuinit notrace cpu_init (void)
        for (v = 0; v < N_EXCEPTION_STACKS; v++) {
                static const unsigned int order[N_EXCEPTION_STACKS] = {
                        [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
+#if DEBUG_STACK > 0
                        [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
+#endif
                };
                if (cpu) {
                        estacks = (char *)__get_free_pages(GFP_ATOMIC, 
order[v]);
Index: linux-2.6.23-rc4-rt1/include/asm-x86_64/page.h
===================================================================
--- linux-2.6.23-rc4-rt1.orig/include/asm-x86_64/page.h
+++ linux-2.6.23-rc4-rt1/include/asm-x86_64/page.h
@@ -22,12 +22,21 @@
 #define IRQSTACK_ORDER 2
 #define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER)
 
+#ifdef CONFIG_PREEMPT_RT
+#define STACKFAULT_STACK 0
+#define DOUBLEFAULT_STACK 1
+#define NMI_STACK 2
+#define DEBUG_STACK 0
+#define MCE_STACK 3
+#define N_EXCEPTION_STACKS 3  /* hw limit: 7 */
+#else
 #define STACKFAULT_STACK 1
 #define DOUBLEFAULT_STACK 2
 #define NMI_STACK 3
 #define DEBUG_STACK 4
 #define MCE_STACK 5
 #define N_EXCEPTION_STACKS 5  /* hw limit: 7 */
+#endif
 
 #define LARGE_PAGE_MASK (~(LARGE_PAGE_SIZE-1))
 #define LARGE_PAGE_SIZE (_AC(1,UL) << PMD_SHIFT)
Index: linux-2.6.23-rc4-rt1/arch/x86_64/kernel/traps.c
===================================================================
--- linux-2.6.23-rc4-rt1.orig/arch/x86_64/kernel/traps.c
+++ linux-2.6.23-rc4-rt1/arch/x86_64/kernel/traps.c
@@ -130,10 +130,14 @@ static unsigned long *in_exception_stack
                                        unsigned *usedp, char **idp)
 {
        static char ids[][8] = {
+#if DEBUG_STACK > 0
                [DEBUG_STACK - 1] = "#DB",
+#endif
                [NMI_STACK - 1] = "NMI",
                [DOUBLEFAULT_STACK - 1] = "#DF",
+#if STACKFAULT_STACK > 0
                [STACKFAULT_STACK - 1] = "#SS",
+#endif
                [MCE_STACK - 1] = "#MC",
 #if DEBUG_STKSZ > EXCEPTION_STKSZ
                [N_EXCEPTION_STACKS ... N_EXCEPTION_STACKS + DEBUG_STKSZ / 
EXCEPTION_STKSZ - 2] = "#DB[?]"
-
To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to