On Sat, Apr 02, 2016 at 02:19:17PM +0200, Patrick Wildt wrote: > Hi, > > the Procedure Call Standard used in EABI requires the stack pointer to > be 8-byte aligned by > > * exception handlers, before calling AAPCS-conforming code. > * the OS, before giving control to an application. > > This diff makes sure our kernel interfaces adhere to that requirement. > > Can someone make sure this doesn't break armish/zaurus? > > Patrick > > diff --git sys/arch/arm/arm/cpuswitch.S sys/arch/arm/arm/cpuswitch.S > index 98e2dbe..0c3d0af 100644 > --- sys/arch/arm/arm/cpuswitch.S > +++ sys/arch/arm/arm/cpuswitch.S > @@ -171,6 +171,7 @@ ENTRY(cpu_idle_leave) > > ENTRY(cpu_switchto) > stmfd sp!, {r4-r7, lr} > + sub sp, sp, #4 > > #ifdef MULTIPROCESSOR > /* XXX use curcpu() */ > @@ -451,6 +452,7 @@ ENTRY(cpu_switchto) > * Pull the registers that got pushed when either savectx() or > * cpu_switch() was called and return. > */ > + add sp, sp, #4 > ldmfd sp!, {r4-r7, pc} > > /* LINTSTUB: Func: void savectx(struct pcb *pcb) */ > @@ -461,6 +463,7 @@ ENTRY(savectx) > > /* Push registers.*/ > stmfd sp!, {r4-r7, lr} > + sub sp, sp, #4 > > /* Store all the registers in the process's pcb */ > #ifndef __XSCALE__ > @@ -473,6 +476,7 @@ ENTRY(savectx) > #endif > > /* Pull the regs of the stack */ > + add sp, sp, #4 > ldmfd sp!, {r4-r7, pc} > > ENTRY(proc_trampoline) > diff --git sys/arch/arm/arm/cpuswitch7.S sys/arch/arm/arm/cpuswitch7.S > index 126b41a..4db9a86 100644 > --- sys/arch/arm/arm/cpuswitch7.S > +++ sys/arch/arm/arm/cpuswitch7.S > @@ -165,6 +165,7 @@ ENTRY(cpu_idle_leave) > > ENTRY(cpu_switchto) > stmfd sp!, {r4-r7, lr} > + sub sp, sp, #4 > > #ifdef MULTIPROCESSOR > /* XXX use curcpu() */ > @@ -396,6 +397,7 @@ ENTRY(cpu_switchto) > * Pull the registers that got pushed when either savectx() or > * cpu_switch() was called and return. > */ > + add sp, sp, #4 > ldmfd sp!, {r4-r7, pc} > > /* LINTSTUB: Func: void savectx(struct pcb *pcb) */ > @@ -406,6 +408,7 @@ ENTRY(savectx) > > /* Push registers.*/ > stmfd sp!, {r4-r7, lr} > + sub sp, sp, #4 > > /* Store all the registers in the process's pcb */ > #ifndef __XSCALE__ > @@ -418,6 +421,7 @@ ENTRY(savectx) > #endif > > /* Pull the regs of the stack */ > + add sp, sp, #4 > ldmfd sp!, {r4-r7, pc} > > ENTRY(proc_trampoline) > diff --git sys/arch/arm/arm/vm_machdep.c sys/arch/arm/arm/vm_machdep.c > index 06f217b..84dfb68 100644 > --- sys/arch/arm/arm/vm_machdep.c > +++ sys/arch/arm/arm/vm_machdep.c > @@ -140,10 +140,11 @@ cpu_fork(p1, p2, stack, stacksize, func, arg) > *tf = *p1->p_addr->u_pcb.pcb_tf; > > /* > - * If specified, give the child a different stack. > + * If specified, give the child a different stack (make sure > + * it's 8-byte aligned). > */ > if (stack != NULL) > - tf->tf_usr_sp = (u_int)stack + stacksize; > + tf->tf_usr_sp = ((vaddr_t)(stack) + stacksize) & -8; > > sf = (struct switchframe *)tf - 1; > sf->sf_r4 = (u_int)func; > diff --git sys/arch/arm/include/frame.h sys/arch/arm/include/frame.h > index 31b2936..56e1368 100644 > --- sys/arch/arm/include/frame.h > +++ sys/arch/arm/include/frame.h > @@ -75,6 +75,7 @@ typedef struct trapframe { > register_t tf_svc_sp; > register_t tf_svc_lr; > register_t tf_pc; > + register_t tf_pad; > } trapframe_t; > > /* Register numbers */ > @@ -137,6 +138,7 @@ typedef struct irqframe { > unsigned int if_svc_sp; > unsigned int if_svc_lr; > unsigned int if_pc; > + unsigned int if_pad; > } irqframe_t; > > #define clockframe irqframe > @@ -146,6 +148,7 @@ typedef struct irqframe { > */ > > struct switchframe { > + u_int sf_pad; > u_int sf_r4; > u_int sf_r5; > u_int sf_r6; > @@ -203,6 +206,7 @@ struct frame { > */ > > #define PUSHFRAME \ > + sub sp, sp, #4; /* Align the stack */ \ > str lr, [sp, #-4]!; /* Push the return address */ \ > sub sp, sp, #(4*17); /* Adjust the stack pointer */ \ > stmia sp, {r0-r14}^; /* Push the user mode registers */ \ > @@ -221,7 +225,8 @@ struct frame { > ldmia sp, {r0-r14}^; /* Restore registers (usr mode) > */ \ > mov r0, r0; /* NOP for previous instruction */ \ > add sp, sp, #(4*17); /* Adjust the stack pointer */ \ > - ldr lr, [sp], #0x0004 /* Pull the return address */ > + ldr lr, [sp], #0x0004; /* Pull the return address */ \ > + add sp, sp, #4 /* Align the stack */ > > /* > * PUSHFRAMEINSVC - macro to push a trap frame on the stack in SVC32 mode > @@ -241,6 +246,8 @@ struct frame { > orr r2, r2, #(PSR_SVC32_MODE); \ > msr cpsr_c, r2; /* Punch into SVC mode */ \ > mov r2, sp; /* Save SVC sp */ \ > + bic sp, sp, #7; /* Align sp to an 8-byte addrress */ \ > + sub sp, sp, #4; /* Pad trapframe to keep alignment */ \ > str r0, [sp, #-4]!; /* Push return address */ \ > str lr, [sp, #-4]!; /* Push SVC lr */ \ > str r2, [sp, #-4]!; /* Push SVC sp */ \ >
Any other concerns or questions regarding this diff? ok?