This patch fixes the issue that every 10th or so process becomes a zombie.

Currently whenever a timer irq happens task are rescheduled. There is one
problem with this approach though. When a task calls exit() it is marked as
EXIT_DEAD. The kernel will remove such a task then if all its references have
been drop, but it will not consider it for rescheduling.

Now if the timer interrupt interrupts the exit() syscall before the reference
has been dropped but after the task is marked as dead the task will never loose
all its references and thus becomes a zombie.

The solution to this problem is to never interrupt a task which is currently
running in kernel space. For this we introduce a new global variable
kernel_mode, which will be set to "1" whenever a process enters kernel mode, for
example though a syscall or when it gets interrupted by a interrupt.
Now, when the interrupt handler is entered we check kernel_mode to see if we are
allready in kernel_mode and store the result into the irq stack frame of the
current process. When the irq handler is left again we only reschedule tasks if
the current task was in user mode at the time of interruption.

This is the expected behaviour for a kernel build without CONFIG_PREEMPT.

Signed-off-by: Lars-Peter Clausen <[email protected]>
---
 arch/lm32/include/asm/ptrace.h    |    2 +-
 arch/lm32/include/asm/registers.h |    4 ++
 arch/lm32/include/asm/setup.h     |    1 +
 arch/lm32/kernel/entry.S          |   98 ++++++++++++++++++------------------
 arch/lm32/kernel/process.c        |   13 ++---
 arch/lm32/kernel/setup.c          |    2 +
 arch/lm32/kernel/signal.c         |   16 +++---
 7 files changed, 71 insertions(+), 65 deletions(-)

diff --git a/arch/lm32/include/asm/ptrace.h b/arch/lm32/include/asm/ptrace.h
index 8ca0ff6..031e119 100644
--- a/arch/lm32/include/asm/ptrace.h
+++ b/arch/lm32/include/asm/ptrace.h
@@ -43,7 +43,7 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-#define user_mode(regs) (get_fs() == USER_DS)
+#define user_mode(regs) ((regs)->pt_mode == PT_MODE_USER)
 #define instruction_pointer(regs) ((regs)->ea)
 #define profile_pc(regs) instruction_pointer(regs)
 extern void show_regs(struct pt_regs *);
diff --git a/arch/lm32/include/asm/registers.h 
b/arch/lm32/include/asm/registers.h
index 67a468f..8f52e8c 100644
--- a/arch/lm32/include/asm/registers.h
+++ b/arch/lm32/include/asm/registers.h
@@ -13,6 +13,9 @@
 #ifndef _ASM_REGISTERS_H
 #define _ASM_REGISTERS_H
 
+#define PT_MODE_KERNEL 1
+#define PT_MODE_USER 0
+
 #ifndef __ASSEMBLY__
 
 /* this struct defines the way the registers are stored on the
@@ -50,6 +53,7 @@ struct pt_regs {
   long     ra;
   long     ea;
   long     ba;
+  unsigned int pt_mode;
 };
 
 /* this defines the registers stored during an interrupt */
diff --git a/arch/lm32/include/asm/setup.h b/arch/lm32/include/asm/setup.h
index cb8f618..6aba7e6 100644
--- a/arch/lm32/include/asm/setup.h
+++ b/arch/lm32/include/asm/setup.h
@@ -28,6 +28,7 @@
 
 #ifdef __KERNEL__
 
+extern unsigned int kernel_mode;
 extern unsigned int cpu_frequency;
 extern unsigned int sdram_start;
 extern unsigned int sdram_size;
diff --git a/arch/lm32/kernel/entry.S b/arch/lm32/kernel/entry.S
index 232d253..aec9e65 100644
--- a/arch/lm32/kernel/entry.S
+++ b/arch/lm32/kernel/entry.S
@@ -7,6 +7,7 @@
 #include <asm/setup.h>
 #include <asm/segment.h>
 #include <asm/asm-offsets.h>
+#include <asm/registers.h>
 
 /* 
  * Exception vector table (see "LatticeMico32 Processor Reference Manual")
@@ -29,7 +30,7 @@
 
 /* exception vector for os-aware gdb and kernel signals */
 #define KERNEL_EXCEPTION_VECTOR(offset) \
-       addi    sp,sp,-128; \
+       addi    sp,sp,-132; \
        sw              (sp+120), ra; \
        calli   _save_syscall_frame; \
        mvi             r1, offset; \
@@ -132,7 +133,7 @@ ENTRY(system_call)
 2:/* we now are on kernel stack and registers are untainted */
 
   /* save registers */
-       addi  sp, sp, -128
+       addi  sp, sp, -132
        sw    (sp + 120), ra
        calli _save_syscall_frame
 
@@ -224,14 +225,14 @@ ENTRY(system_call)
        bi 2f; \
 1:/* already on kernel stack */ \
        /* restore r9, r10 */ \
-       lw r9, (sp+0); \
-       lw r10, (sp+-4); \
+       lw r9, (sp+-4); \
+       lw r10, (sp+-8); \
        /* no need to restore r11 as we did not use it */ \
 2:/* now for sure on kernel stack */
 
 _long_breakpoint_handler:
        ENSURE_TO_BE_ON_KERNEL_STACK; \
-       addi    sp,sp,-128; \
+       addi    sp,sp,-132; \
        calli   _save_syscall_frame; \
        mvi             r1, 32; /* 32 = breakpoint magic offset */ \
        addi    r2, sp, 4; \
@@ -358,7 +359,7 @@ ENTRY(sys_clone_wrapper)
 /* we therefore only save and restore the caller saved registers */
 /* (r1-r10, ra, ea because an interrupt could interrupt another one) */
 _long_interrupt_handler:
-       addi    sp, sp, -128
+       addi    sp, sp, -132
        sw      (sp+120), ra
        calli   _save_irq_frame
 
@@ -393,10 +394,22 @@ _save_irq_frame:
        sw      (sp+44),  r10
        /* ra (sp + 120) has already been written */
        sw      (sp+124), ea
-       ret
+
+       mvhi r1, hi(kernel_mode)
+       ori r1, r1, lo(kernel_mode)
+       lw r2, (r1+0)
+       sw (sp+132), r2
+       mvi r2, PT_MODE_KERNEL
+       sw (r1+0), r2
+ret
 
 /* restore all caller saved registers saved in _save_irq_frame and return from 
exception */
 _restore_irq_frame_and_return:
+       lw r2, (sp+132)
+       mvhi r1, hi(kernel_mode)
+       ori r1, r1, lo(kernel_mode)
+       sw (r1+0), r2
+
        lw      r1,  (sp+8);
        lw      r2,  (sp+12);
        lw      r3,  (sp+16);
@@ -409,11 +422,10 @@ _restore_irq_frame_and_return:
        lw      r10, (sp+44);
        lw      ra,  (sp+120)
        lw      ea,  (sp+124)
-       addi    sp, sp, 128
+       addi    sp, sp, 132
        eret
 
 _save_syscall_frame:
-       sw      (sp+4),   r0
        sw      (sp+8),   r1
        sw      (sp+12),  r2
        sw      (sp+16),  r3
@@ -441,11 +453,18 @@ _save_syscall_frame:
        sw      (sp+104), r25
        sw      (sp+108), r26
        sw      (sp+112), r27
-       addi     r7, sp, 128 /* we could store usp here */
+       addi     r7, sp, 132 /* we could store usp here */
        sw      (sp+116), r7
        /* ra (sp + 120) has already been written */
        sw      (sp+124), ea
        sw      (sp+128), ba
+
+       mvhi r11, hi(kernel_mode)
+       ori r11, r11, lo(kernel_mode)
+       lw r12, (r11+0)
+       sw (sp+132), r12
+       mvi r12, PT_MODE_KERNEL
+       sw (r11+0), r12
        ret
 
 /************************/
@@ -469,6 +488,10 @@ _save_syscall_frame:
 
 #define RETURN_FROM_SYSCALL_OR_EXCEPTION(label, addr_register, return_instr) \
 label: \
+       lw r2, (sp+132); \
+       mvhi r1, hi(kernel_mode); \
+       ori r1, r1, lo(kernel_mode); \
+       sw (r1+0), r2; \
        /* prepare switch to user stack but keep kernel stack pointer in r11 */ 
\
        /* r9: scratch register */ \
        /* r10: current = current_thread_info()->task */ \
@@ -479,10 +502,10 @@ label: \
        lw r9, (r9+0); /* dereference lm32_current_thread */ \
        lw r10, (r9+TI_TASK); /* load pointer to task */ \
        /* set task->thread.which_stack to 1 (user stack) */ \
-       mvi r9, 1; \
+       mvi r9, TASK_USP - TASK_KSP; \
        sw (r10+TASK_WHICH_STACK), r9; \
        /* store ksp (after restore of frame) into task->thread.ksp */ \
-       addi r9, sp, 128; \
+       addi r9, sp, 132; \
        sw (r10+TASK_KSP), r9; \
        /* save sp into r11 */ \
        mv r11, sp; \
@@ -539,10 +562,7 @@ 
RETURN_FROM_SYSCALL_OR_EXCEPTION(_restore_and_return_debug_exception,ea,bret)
  */
 ENTRY(resume)
        /* store whole state to current stack (may be usp or ksp) */
-       addi sp, sp, -128
-       sw  (sp+4),   r0
-       sw  (sp+8),   r1
-       sw  (sp+12),  r2
+       addi sp, sp, -132
        sw  (sp+16),  r3
        sw  (sp+20),  r4
        sw  (sp+24),  r5
@@ -568,42 +588,26 @@ ENTRY(resume)
        sw  (sp+104), r25
        sw  (sp+108), r26
        sw  (sp+112), r27
-       addi r3, sp, 128 /* special case for stack pointer */
+       addi r3, sp, 132 /* special case for stack pointer */
        sw  (sp+116), r3 /* special case for stack pointer */
        sw      (sp+120), ra
-//     sw  (sp+124), ea
-//     sw  (sp+128), ba
+/*     sw  (sp+124), ea
+       sw  (sp+128), ba */
 
-       /* find out whether we are on kernel or user stack */
-       lw  r3, (r1 + TASK_WHICH_STACK)
-       be  r3, r0, 1f
-
-       /* we are on user stack */
-       sw  (r1 + TASK_USP), sp
-       bi 2f
 
-1:/* we are on kernel stack */
-       sw  (r1 + TASK_KSP), sp
+       /* TODO: Aren't we always on kernel stack at this point? */
 
-2:/* we have stored stack pointer of prev */
+       /* find out whether we are on kernel or user stack */
+       lw  r3, (r1 + TASK_WHICH_STACK)
+       add r3, r3, r1
+       sw  (r3 + TASK_KSP), sp
 
-  /* restore next */
+       /* restore next */
 
        /* find out whether we will be on kernel or user stack */
        lw  r3, (r2 + TASK_WHICH_STACK)
-       be  r3, r0, 3f
-
-       /* we need user stack */
-       lw  sp, (r2 + TASK_USP)
-       bi 4f
-
-3:/* we need kernel stack */
-       lw  sp, (r2 + TASK_KSP)
-
-4:/* we have restored sp of next */
-
-       /* setup return value */
-       mv      r1, r2
+       add r3, r3, r2
+       lw  sp, (r3 + TASK_KSP)
 
        lw  r2,  (sp+12)
        lw  r3,  (sp+16)
@@ -633,17 +637,13 @@ ENTRY(resume)
        lw  r27, (sp+112)
        /* skip sp for now */
        lw  ra,  (sp+120)
-//     lw  ea,  (sp+124)
-//     lw  ba,  (sp+128)
+/*     lw  ea,  (sp+124)
+       lw  ba,  (sp+128) */
        /* Stack pointer must be restored last --- it will be updated */
        lw  sp,  (sp+116)
 
        ret
 
-/* extern asmlinkage void break_label(void); */
-ENTRY(break_label)
-       break
-
 .align 4
 ENTRY(_sys_call_table)
        .long sys_ni_syscall    /* 0  -  old "setup()" system call*/
diff --git a/arch/lm32/kernel/process.c b/arch/lm32/kernel/process.c
index d1ca380..6ad94cc9 100644
--- a/arch/lm32/kernel/process.c
+++ b/arch/lm32/kernel/process.c
@@ -127,7 +127,7 @@ void show_regs(struct pt_regs * regs)
 }
 
 
-void kernel_thread_helper(int reserved, int (*fn)(void*), void* arg)
+static void kernel_thread_helper(int reserved, int (*fn)(void*), void* arg)
 {
   /* Note: read copy_thread, kernel_thread and ret_from_fork to fully 
appreciate why the first argument is "reserved" */
 
@@ -151,6 +151,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned 
long flags)
        regs.r2 = (unsigned long)fn;
        regs.r3 = (unsigned long)arg;
        regs.r5 = (unsigned long)kernel_thread_helper;
+       regs.pt_mode = PT_MODE_KERNEL;
        return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, 
NULL);
 }
 
@@ -193,8 +194,6 @@ unsigned long thread_saved_pc(struct task_struct *tsk)
        return 0;
 }
 
-
-
 int copy_thread(unsigned long clone_flags,
                unsigned long usp, unsigned long stk_size,
                struct task_struct * p, struct pt_regs * regs)
@@ -202,7 +201,7 @@ int copy_thread(unsigned long clone_flags,
        unsigned long child_tos = KSTK_TOS(p);
        struct pt_regs *childregs;
 
-       if (regs->r5 == (unsigned long)kernel_thread_helper) {
+       if (!user_mode(regs)) {
                /* kernel thread */
 
                if( usp != 0 )
@@ -210,11 +209,10 @@ int copy_thread(unsigned long clone_flags,
 
                /* childregs = full task switch frame on kernel stack of child 
*/
                childregs = (struct pt_regs *)(child_tos) - 1;
-
                *childregs = *regs;
 
                childregs->r4 = 0; /* child gets zero as return value */
-               regs->r4 = p->pid; /* parent gets child pid as return value */ 
+               regs->r4 = p->pid; /* parent gets child pid as return value */
 
                /* return via ret_from_fork */
                childregs->ra = (unsigned long)ret_from_fork;
@@ -272,8 +270,6 @@ int copy_thread(unsigned long clone_flags,
                 * syscall frame */
                childregs->sp = (unsigned long)childsyscallregs - 4;
 
-               put_task_struct(p);
-
                /*printk("copy_thread2: ->pid=%d p=%lx regs=%lx childregs=%lx 
r5=%lx ra=%lx "
                                "dsf=%lx p->thread.ksp=%lx p->thread.usp=%lx\n",
                                p->pid, p, regs, childregs, childregs->r5, 
childregs->ra,
@@ -302,6 +298,7 @@ void start_thread(struct pt_regs * regs, unsigned long pc, 
unsigned long usp)
        regs->sp = usp;
        current->thread.usp = usp;
        regs->fp = current->mm->start_data;
+       regs->pt_mode = PT_MODE_USER;
 
        //printk("start_thread: current=%lx usp=%lx\n", current, usp);
 }
diff --git a/arch/lm32/kernel/setup.c b/arch/lm32/kernel/setup.c
index 7f572c7..434817c 100644
--- a/arch/lm32/kernel/setup.c
+++ b/arch/lm32/kernel/setup.c
@@ -61,6 +61,8 @@
 #include <asm/hw/milkymist.h>
 #endif
 
+unsigned int kernel_mode = PT_MODE_KERNEL;
+
 /* this is set first thing as the kernel is started
  * from the arguments to the kernel. */
 unsigned long asmlinkage _kernel_arg_cmdline; /* address of the commandline 
parameters */
diff --git a/arch/lm32/kernel/signal.c b/arch/lm32/kernel/signal.c
index 2e1fb84..fd81c5a 100644
--- a/arch/lm32/kernel/signal.c
+++ b/arch/lm32/kernel/signal.c
@@ -428,9 +428,13 @@ int do_signal(int retval, struct pt_regs *regs, int* 
handled)
        return retval;
 }
 
-asmlinkage int manage_signals(int retval, struct pt_regs* regs) {
+asmlinkage int manage_signals(int retval, struct pt_regs* regs)
+{
        unsigned long flags;
 
+       if (regs->pt_mode == PT_MODE_KERNEL)
+               return 0;
+
        /* disable interrupts for sampling current_thread_info()->flags */
        local_irq_save(flags);
        while( current_thread_info()->flags & (_TIF_NEED_RESCHED | 
_TIF_SIGPENDING) ) {
@@ -472,10 +476,11 @@ asmlinkage int manage_signals(int retval, struct pt_regs* 
regs) {
        return retval;
 }
 
-asmlinkage void manage_signals_irq(struct pt_regs* regs) {
+asmlinkage void manage_signals_irq(struct pt_regs* regs)
+{
        unsigned long flags;
-       /* do not handle in atomic mode */
-       if (unlikely(in_atomic_preempt_off()) && unlikely(!current->exit_state))
+
+       if (regs->pt_mode == PT_MODE_KERNEL)
                return;
 
        /* disable interrupts for sampling current_thread_info()->flags */
@@ -484,9 +489,6 @@ asmlinkage void manage_signals_irq(struct pt_regs* regs) {
        if( current_thread_info()->flags & _TIF_NEED_RESCHED ) {
                /* schedule -> enables interrupts */
                schedule();
-               
-               /* disable interrupts for sampling current_thread_info()->flags 
*/
-               local_irq_disable();
        }
 
        local_irq_restore(flags);
-- 
1.7.2.3

_______________________________________________
http://lists.milkymist.org/listinfo.cgi/devel-milkymist.org
IRC: #milkymist@Freenode
Twitter: www.twitter.com/milkymistvj
Ideas? http://milkymist.uservoice.com

Reply via email to