Hi,

the Procedure Call Standard used in EABI requires the stack pointer to
be 8-byte aligned by

  * exception handlers, before calling AAPCS-conforming code.
  * the OS, before giving control to an application.

This diff makes sure our kernel interfaces adhere to that requirement.

Can someone make sure this doesn't break armish/zaurus?

Patrick

diff --git sys/arch/arm/arm/cpuswitch.S sys/arch/arm/arm/cpuswitch.S
index 98e2dbe..0c3d0af 100644
--- sys/arch/arm/arm/cpuswitch.S
+++ sys/arch/arm/arm/cpuswitch.S
@@ -171,6 +171,7 @@ ENTRY(cpu_idle_leave)
 
 ENTRY(cpu_switchto)
        stmfd   sp!, {r4-r7, lr}
+       sub     sp, sp, #4
 
 #ifdef MULTIPROCESSOR
        /* XXX use curcpu() */
@@ -451,6 +452,7 @@ ENTRY(cpu_switchto)
         * Pull the registers that got pushed when either savectx() or
         * cpu_switch() was called and return.
         */
+       add     sp, sp, #4
        ldmfd   sp!, {r4-r7, pc}
 
 /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
@@ -461,6 +463,7 @@ ENTRY(savectx)
 
        /* Push registers.*/
        stmfd   sp!, {r4-r7, lr}
+       sub     sp, sp, #4
 
        /* Store all the registers in the process's pcb */
 #ifndef __XSCALE__
@@ -473,6 +476,7 @@ ENTRY(savectx)
 #endif
 
        /* Pull the regs of the stack */
+       add     sp, sp, #4
        ldmfd   sp!, {r4-r7, pc}
 
 ENTRY(proc_trampoline)
diff --git sys/arch/arm/arm/cpuswitch7.S sys/arch/arm/arm/cpuswitch7.S
index 126b41a..4db9a86 100644
--- sys/arch/arm/arm/cpuswitch7.S
+++ sys/arch/arm/arm/cpuswitch7.S
@@ -165,6 +165,7 @@ ENTRY(cpu_idle_leave)
 
 ENTRY(cpu_switchto)
        stmfd   sp!, {r4-r7, lr}
+       sub     sp, sp, #4
 
 #ifdef MULTIPROCESSOR
        /* XXX use curcpu() */
@@ -396,6 +397,7 @@ ENTRY(cpu_switchto)
         * Pull the registers that got pushed when either savectx() or
         * cpu_switch() was called and return.
         */
+       add     sp, sp, #4
        ldmfd   sp!, {r4-r7, pc}
 
 /* LINTSTUB: Func: void savectx(struct pcb *pcb) */
@@ -406,6 +408,7 @@ ENTRY(savectx)
 
        /* Push registers.*/
        stmfd   sp!, {r4-r7, lr}
+       sub     sp, sp, #4
 
        /* Store all the registers in the process's pcb */
 #ifndef __XSCALE__
@@ -418,6 +421,7 @@ ENTRY(savectx)
 #endif
 
        /* Pull the regs of the stack */
+       add     sp, sp, #4
        ldmfd   sp!, {r4-r7, pc}
 
 ENTRY(proc_trampoline)
diff --git sys/arch/arm/arm/vm_machdep.c sys/arch/arm/arm/vm_machdep.c
index 06f217b..84dfb68 100644
--- sys/arch/arm/arm/vm_machdep.c
+++ sys/arch/arm/arm/vm_machdep.c
@@ -140,10 +140,11 @@ cpu_fork(p1, p2, stack, stacksize, func, arg)
        *tf = *p1->p_addr->u_pcb.pcb_tf;
 
        /*
-        * If specified, give the child a different stack.
+        * If specified, give the child a different stack (make sure
+        * it's 8-byte aligned).
         */
        if (stack != NULL)
-               tf->tf_usr_sp = (u_int)stack + stacksize;
+               tf->tf_usr_sp = ((vaddr_t)(stack) + stacksize) & -8;
 
        sf = (struct switchframe *)tf - 1;
        sf->sf_r4 = (u_int)func;
diff --git sys/arch/arm/include/frame.h sys/arch/arm/include/frame.h
index 31b2936..56e1368 100644
--- sys/arch/arm/include/frame.h
+++ sys/arch/arm/include/frame.h
@@ -75,6 +75,7 @@ typedef struct trapframe {
        register_t tf_svc_sp;
        register_t tf_svc_lr;
        register_t tf_pc;
+       register_t tf_pad;
 } trapframe_t;
 
 /* Register numbers */
@@ -137,6 +138,7 @@ typedef struct irqframe {
        unsigned int if_svc_sp;
        unsigned int if_svc_lr;
        unsigned int if_pc;
+       unsigned int if_pad;
 } irqframe_t;
 
 #define clockframe irqframe
@@ -146,6 +148,7 @@ typedef struct irqframe {
  */
 
 struct switchframe {
+       u_int   sf_pad;
        u_int   sf_r4;
        u_int   sf_r5;
        u_int   sf_r6;
@@ -203,6 +206,7 @@ struct frame {
  */
 
 #define PUSHFRAME                                                         \
+       sub     sp, sp, #4;             /* Align the stack */              \
        str     lr, [sp, #-4]!;         /* Push the return address */      \
        sub     sp, sp, #(4*17);        /* Adjust the stack pointer */     \
        stmia   sp, {r0-r14}^;          /* Push the user mode registers */ \
@@ -221,7 +225,8 @@ struct frame {
         ldmia   sp, {r0-r14}^;         /* Restore registers (usr mode) */ \
         mov     r0, r0;                 /* NOP for previous instruction */ \
        add     sp, sp, #(4*17);        /* Adjust the stack pointer */     \
-       ldr     lr, [sp], #0x0004       /* Pull the return address */
+       ldr     lr, [sp], #0x0004;      /* Pull the return address */      \
+       add     sp, sp, #4              /* Align the stack */
 
 /*
  * PUSHFRAMEINSVC - macro to push a trap frame on the stack in SVC32 mode
@@ -241,6 +246,8 @@ struct frame {
        orr     r2, r2, #(PSR_SVC32_MODE);                                 \
        msr     cpsr_c, r2;             /* Punch into SVC mode */          \
        mov     r2, sp;                 /* Save SVC sp */                  \
+       bic     sp, sp, #7;             /* Align sp to an 8-byte addrress */  \
+       sub     sp, sp, #4;             /* Pad trapframe to keep alignment */ \
        str     r0, [sp, #-4]!;         /* Push return address */          \
        str     lr, [sp, #-4]!;         /* Push SVC lr */                  \
        str     r2, [sp, #-4]!;         /* Push SVC sp */                  \

Reply via email to