First, expand the INTR_RESTORE_GPRS macro and then rearrange the register
restores around the INTR_RESTORE_SELECTORS macro to minimize how long
interupts are blocked. This also lets us eliminate all the adjusting of
the stack pointer except for the necessary one on the iretq path.
Instead of having completely separate "entered via int$80" and syscall
paths, have the int$80 path set the MDP_IRET flag in md_proc and then jump
to the common "call syscall() and handle ASTs" code. Then, after that,
check the MDP_IRET flag and use the correct return path. This lets us set
MDP_IRET in the kernel to force return via iretq despite entering via
syscall. With *that* we can change sigcode to invoke sigreturn via
syscall instead of int$80, have sigreturn set MDP_IRET, and then return
via iretq, which is necessary for correct restoring of rcx and r11 when
interrupted.
Been working for quite a while for me, even testing the "real interrupt
restoring r11 and rcx correctly part".
Tests please from people running amd64...
Philip
Index: locore.S
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
retrieving revision 1.45
diff -u -p -r1.45 locore.S
--- locore.S 5 Apr 2011 21:14:00 -0000 1.45
+++ locore.S 5 Apr 2011 21:30:02 -0000
@@ -650,7 +650,7 @@ NENTRY(sigcode)
movq %rsp,%rdi
pushq %rdi /* fake return address */
movq $SYS_sigreturn,%rax
- int $0x80
+ syscall
movq $SYS_exit,%rax
syscall
.globl _C_LABEL(esigcode)
@@ -935,8 +935,9 @@ IDTVEC(syscall)
movq $T_ASTFLT, TF_TRAPNO(%rsp)
movq CPUVAR(CURPROC),%r14
- movq %rsp,P_MD_REGS(%r14) # save pointer to frame
andl $~MDP_IRET,P_MD_FLAGS(%r14)
+call_syscall:
+ movq %rsp,P_MD_REGS(%r14) # save pointer to frame
movq %rsp,%rdi
call _C_LABEL(syscall)
1: /* Check for ASTs on exit to user mode. */
@@ -959,16 +960,27 @@ syscall_return:
cmpl $IPL_NONE,CPUVAR(ILEVEL)
jne 3f
#endif
- /*
- * XXX interrupts off longer than they should be here.
- */
+
+ movq TF_RDI(%rsp),%rdi
+ movq TF_RSI(%rsp),%rsi
+ movq TF_R8(%rsp),%r8
+ movq TF_R9(%rsp),%r9
+ movq TF_R10(%rsp),%r10
+ movq TF_R12(%rsp),%r12
+ movq TF_R13(%rsp),%r13
+ movq TF_R14(%rsp),%r14
+ movq TF_R15(%rsp),%r15
+ movq TF_RBP(%rsp),%rbp
+ movq TF_RBX(%rsp),%rbx
+
INTR_RESTORE_SELECTORS
- INTR_RESTORE_GPRS
- addq $48,%rsp
- popq %rcx /* return rip */
- addq $8,%rsp
- popq %r11 /* flags as set by sysret insn */
- movq %ss:(%rsp),%rsp
+
+ movq TF_RDX(%rsp),%rdx
+ movq TF_RAX(%rsp),%rax
+
+ movq TF_RIP(%rsp),%rcx
+ movq TF_RFLAGS(%rsp),%r11
+ movq TF_RSP(%rsp),%rsp
sysretq
#ifdef DIAGNOSTIC
@@ -1007,47 +1019,52 @@ NENTRY(child_trampoline)
call *%r12
jmp syscall_return
- .globl _C_LABEL(osyscall_return)
-
/*
- * Trap gate entry for int $80 syscall, also used by sigreturn.
+ * Trap gate entry for old int $80 syscall (used to be used by sigreturn)
*/
IDTVEC(osyscall)
pushq $2 # size of instruction for restart
pushq $T_ASTFLT # trap # for doing ASTs
INTRENTRY
sti
- movq CPUVAR(CURPROC),%rdx
- movq %rsp,P_MD_REGS(%rdx) # save pointer to frame
- movq %rsp,%rdi
- call _C_LABEL(syscall)
-_C_LABEL(osyscall_return):
-2: /* Check for ASTs on exit to user mode. */
- cli
- CHECK_ASTPENDING(%r11)
- je 1f
- /* Always returning to user mode here. */
- CLEAR_ASTPENDING(%r11)
- sti
- /* Pushed T_ASTFLT into tf_trapno on entry. */
- movq %rsp,%rdi
- call _C_LABEL(trap)
- jmp 2b
+ movq CPUVAR(CURPROC),%r14
+ orl $MDP_IRET,P_MD_FLAGS(%r14)
+ jmp call_syscall
+/*
+ * Return via iretq, for real interrupts and signal returns
+ */
iret_return:
-1:
#ifdef DIAGNOSTIC
cmpl $IPL_NONE,CPUVAR(ILEVEL)
jne 3f
-#endif /* DIAGNOSTIC */
+#endif
.globl intr_fast_exit
intr_fast_exit:
+ movq TF_RDI(%rsp),%rdi
+ movq TF_RSI(%rsp),%rsi
+ movq TF_R8(%rsp),%r8
+ movq TF_R9(%rsp),%r9
+ movq TF_R10(%rsp),%r10
+ movq TF_R12(%rsp),%r12
+ movq TF_R13(%rsp),%r13
+ movq TF_R14(%rsp),%r14
+ movq TF_R15(%rsp),%r15
+ movq TF_RBP(%rsp),%rbp
+ movq TF_RBX(%rsp),%rbx
+
testq $SEL_UPL,TF_CS(%rsp)
je 5f
+
INTR_RESTORE_SELECTORS
-5: INTR_RESTORE_GPRS
- addq $48,%rsp
+
+5: movq TF_RDX(%rsp),%rdx
+ movq TF_RCX(%rsp),%rcx
+ movq TF_R11(%rsp),%r11
+ movq TF_RAX(%rsp),%rax
+ addq $TF_RIP,%rsp
+
.globl _C_LABEL(doreti_iret)
_C_LABEL(doreti_iret):
iretq
Index: machdep.c
===================================================================
RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
retrieving revision 1.135
diff -u -p -r1.135 machdep.c
--- machdep.c 5 Apr 2011 21:14:00 -0000 1.135
+++ machdep.c 5 Apr 2011 21:30:03 -0000
@@ -674,6 +674,7 @@ sys_sigreturn(struct proc *p, void *v, r
else
p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
p->p_sigmask = ksc.sc_mask & ~sigcantmask;
+ p->p_md.md_flags |= MDP_IRET;
return (EJUSTRETURN);
}