On Sat, Jul 02, 2011 at 04:27:15PM -0600, Philip Guenther wrote: > First, expand the INTR_RESTORE_GPRS macro and then rearrange the register > restores around the INTR_RESTORE_SELECTORS macro to minimize how long > interupts are blocked. This also lets us eliminate all the adjusting of > the stack pointer except for the necessary one on the iretq path. > > Instead of having completely separate "entered via int$80" and syscall > paths, have the int$80 path set the MDP_IRET flag in md_proc and then jump > to the common "call syscall() and handle ASTs" code. Then, after that, > check the MDP_IRET flag and use the correct return path. This lets us set > MDP_IRET in the kernel to force return via iretq despite entering via > syscall. With *that* we can change sigcode to invoke sigreturn via > syscall instead of int$80, have sigreturn set MDP_IRET, and then return > via iretq, which is necessary for correct restoring of rcx and r11 when > interrupted. > > Been working for quite a while for me, even testing the "real interrupt > restoring r11 and rcx correctly part". > > Tests please from people running amd64... >
Two amd64 boxes work w/o complaints. .... Ken > > Philip > > > Index: locore.S > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v > retrieving revision 1.45 > diff -u -p -r1.45 locore.S > --- locore.S 5 Apr 2011 21:14:00 -0000 1.45 > +++ locore.S 5 Apr 2011 21:30:02 -0000 > @@ -650,7 +650,7 @@ NENTRY(sigcode) > movq %rsp,%rdi > pushq %rdi /* fake return address */ > movq $SYS_sigreturn,%rax > - int $0x80 > + syscall > movq $SYS_exit,%rax > syscall > .globl _C_LABEL(esigcode) > @@ -935,8 +935,9 @@ IDTVEC(syscall) > movq $T_ASTFLT, TF_TRAPNO(%rsp) > > movq CPUVAR(CURPROC),%r14 > - movq %rsp,P_MD_REGS(%r14) # save pointer to frame > andl $~MDP_IRET,P_MD_FLAGS(%r14) > +call_syscall: > + movq %rsp,P_MD_REGS(%r14) # save pointer to frame > movq %rsp,%rdi > call _C_LABEL(syscall) > 1: /* Check for ASTs on exit to user mode. */ > @@ -959,16 +960,27 @@ syscall_return: > cmpl $IPL_NONE,CPUVAR(ILEVEL) > jne 3f > #endif > - /* > - * XXX interrupts off longer than they should be here. > - */ > + > + movq TF_RDI(%rsp),%rdi > + movq TF_RSI(%rsp),%rsi > + movq TF_R8(%rsp),%r8 > + movq TF_R9(%rsp),%r9 > + movq TF_R10(%rsp),%r10 > + movq TF_R12(%rsp),%r12 > + movq TF_R13(%rsp),%r13 > + movq TF_R14(%rsp),%r14 > + movq TF_R15(%rsp),%r15 > + movq TF_RBP(%rsp),%rbp > + movq TF_RBX(%rsp),%rbx > + > INTR_RESTORE_SELECTORS > - INTR_RESTORE_GPRS > - addq $48,%rsp > - popq %rcx /* return rip */ > - addq $8,%rsp > - popq %r11 /* flags as set by sysret insn */ > - movq %ss:(%rsp),%rsp > + > + movq TF_RDX(%rsp),%rdx > + movq TF_RAX(%rsp),%rax > + > + movq TF_RIP(%rsp),%rcx > + movq TF_RFLAGS(%rsp),%r11 > + movq TF_RSP(%rsp),%rsp > sysretq > > #ifdef DIAGNOSTIC > @@ -1007,47 +1019,52 @@ NENTRY(child_trampoline) > call *%r12 > jmp syscall_return > > - .globl _C_LABEL(osyscall_return) > - > > /* > - * Trap gate entry for int $80 syscall, also used by sigreturn. > + * Trap gate entry for old int $80 syscall (used to be used by sigreturn) > */ > IDTVEC(osyscall) > pushq $2 # size of instruction for restart > pushq $T_ASTFLT # trap # for doing ASTs > INTRENTRY > sti > - movq CPUVAR(CURPROC),%rdx > - movq %rsp,P_MD_REGS(%rdx) # save pointer to frame > - movq %rsp,%rdi > - call _C_LABEL(syscall) > -_C_LABEL(osyscall_return): > -2: /* Check for ASTs on exit to user mode. */ > - cli > - CHECK_ASTPENDING(%r11) > - je 1f > - /* Always returning to user mode here. */ > - CLEAR_ASTPENDING(%r11) > - sti > - /* Pushed T_ASTFLT into tf_trapno on entry. */ > - movq %rsp,%rdi > - call _C_LABEL(trap) > - jmp 2b > + movq CPUVAR(CURPROC),%r14 > + orl $MDP_IRET,P_MD_FLAGS(%r14) > + jmp call_syscall > > +/* > + * Return via iretq, for real interrupts and signal returns > + */ > iret_return: > -1: > #ifdef DIAGNOSTIC > cmpl $IPL_NONE,CPUVAR(ILEVEL) > jne 3f > -#endif /* DIAGNOSTIC */ > +#endif > .globl intr_fast_exit > intr_fast_exit: > + movq TF_RDI(%rsp),%rdi > + movq TF_RSI(%rsp),%rsi > + movq TF_R8(%rsp),%r8 > + movq TF_R9(%rsp),%r9 > + movq TF_R10(%rsp),%r10 > + movq TF_R12(%rsp),%r12 > + movq TF_R13(%rsp),%r13 > + movq TF_R14(%rsp),%r14 > + movq TF_R15(%rsp),%r15 > + movq TF_RBP(%rsp),%rbp > + movq TF_RBX(%rsp),%rbx > + > testq $SEL_UPL,TF_CS(%rsp) > je 5f > + > INTR_RESTORE_SELECTORS > -5: INTR_RESTORE_GPRS > - addq $48,%rsp > + > +5: movq TF_RDX(%rsp),%rdx > + movq TF_RCX(%rsp),%rcx > + movq TF_R11(%rsp),%r11 > + movq TF_RAX(%rsp),%rax > + addq $TF_RIP,%rsp > + > .globl _C_LABEL(doreti_iret) > _C_LABEL(doreti_iret): > iretq > Index: machdep.c > =================================================================== > RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v > retrieving revision 1.135 > diff -u -p -r1.135 machdep.c > --- machdep.c 5 Apr 2011 21:14:00 -0000 1.135 > +++ machdep.c 5 Apr 2011 21:30:03 -0000 > @@ -674,6 +674,7 @@ sys_sigreturn(struct proc *p, void *v, r > else > p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; > p->p_sigmask = ksc.sc_mask & ~sigcantmask; > + p->p_md.md_flags |= MDP_IRET; > > return (EJUSTRETURN); > }
