On Sat, Jul 02, 2011 at 04:27:15PM -0600, Philip Guenther wrote:
> First, expand the INTR_RESTORE_GPRS macro and then rearrange the register 
> restores around the INTR_RESTORE_SELECTORS macro to minimize how long 
> interupts are blocked.  This also lets us eliminate all the adjusting of 
> the stack pointer except for the necessary one on the iretq path.
> 
> Instead of having completely separate "entered via int$80" and syscall 
> paths, have the int$80 path set the MDP_IRET flag in md_proc and then jump 
> to the common "call syscall() and handle ASTs" code.  Then, after that, 
> check the MDP_IRET flag and use the correct return path.  This lets us set 
> MDP_IRET in the kernel to force return via iretq despite entering via 
> syscall.  With *that* we can change sigcode to invoke sigreturn via 
> syscall instead of int$80, have sigreturn set MDP_IRET, and then return 
> via iretq, which is necessary for correct restoring of rcx and r11 when 
> interrupted.
> 
> Been working for quite a while for me, even testing the "real interrupt 
> restoring r11 and rcx correctly part".
> 
> Tests please from people running amd64...
> 

Two amd64 boxes work w/o complaints.

.... Ken

> 
> Philip
> 
> 
> Index: locore.S
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/locore.S,v
> retrieving revision 1.45
> diff -u -p -r1.45 locore.S
> --- locore.S  5 Apr 2011 21:14:00 -0000       1.45
> +++ locore.S  5 Apr 2011 21:30:02 -0000
> @@ -650,7 +650,7 @@ NENTRY(sigcode)
>       movq    %rsp,%rdi
>       pushq   %rdi                    /* fake return address */
>       movq    $SYS_sigreturn,%rax
> -     int     $0x80
> +     syscall
>       movq    $SYS_exit,%rax
>       syscall
>       .globl  _C_LABEL(esigcode)
> @@ -935,8 +935,9 @@ IDTVEC(syscall)
>       movq    $T_ASTFLT, TF_TRAPNO(%rsp)
>  
>       movq    CPUVAR(CURPROC),%r14
> -     movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
>       andl    $~MDP_IRET,P_MD_FLAGS(%r14)
> +call_syscall:
> +     movq    %rsp,P_MD_REGS(%r14)    # save pointer to frame
>       movq    %rsp,%rdi
>       call    _C_LABEL(syscall)
>  1:   /* Check for ASTs on exit to user mode. */
> @@ -959,16 +960,27 @@ syscall_return:
>       cmpl    $IPL_NONE,CPUVAR(ILEVEL)
>       jne     3f
>  #endif
> -     /*
> -      * XXX interrupts off longer than they should be here.
> -      */
> +
> +     movq    TF_RDI(%rsp),%rdi
> +     movq    TF_RSI(%rsp),%rsi
> +     movq    TF_R8(%rsp),%r8
> +     movq    TF_R9(%rsp),%r9
> +     movq    TF_R10(%rsp),%r10
> +     movq    TF_R12(%rsp),%r12
> +     movq    TF_R13(%rsp),%r13
> +     movq    TF_R14(%rsp),%r14
> +     movq    TF_R15(%rsp),%r15
> +     movq    TF_RBP(%rsp),%rbp
> +     movq    TF_RBX(%rsp),%rbx
> +
>       INTR_RESTORE_SELECTORS
> -     INTR_RESTORE_GPRS
> -     addq    $48,%rsp
> -     popq    %rcx    /* return rip */
> -     addq    $8,%rsp
> -     popq    %r11    /* flags as set by sysret insn */
> -     movq    %ss:(%rsp),%rsp
> +
> +     movq    TF_RDX(%rsp),%rdx
> +     movq    TF_RAX(%rsp),%rax
> +
> +     movq    TF_RIP(%rsp),%rcx
> +     movq    TF_RFLAGS(%rsp),%r11
> +     movq    TF_RSP(%rsp),%rsp
>       sysretq
>  
>  #ifdef DIAGNOSTIC
> @@ -1007,47 +1019,52 @@ NENTRY(child_trampoline)
>       call    *%r12
>       jmp     syscall_return
>  
> -     .globl  _C_LABEL(osyscall_return)
> -
>  
>  /*
> - * Trap gate entry for int $80 syscall, also used by sigreturn.
> + * Trap gate entry for old int $80 syscall (used to be used by sigreturn)
>   */
>  IDTVEC(osyscall)
>       pushq   $2              # size of instruction for restart
>       pushq   $T_ASTFLT       # trap # for doing ASTs
>       INTRENTRY
>       sti
> -     movq    CPUVAR(CURPROC),%rdx
> -     movq    %rsp,P_MD_REGS(%rdx)    # save pointer to frame
> -     movq    %rsp,%rdi
> -     call    _C_LABEL(syscall)
> -_C_LABEL(osyscall_return):
> -2:   /* Check for ASTs on exit to user mode. */
> -     cli
> -     CHECK_ASTPENDING(%r11)
> -     je      1f
> -     /* Always returning to user mode here. */
> -     CLEAR_ASTPENDING(%r11)
> -     sti
> -     /* Pushed T_ASTFLT into tf_trapno on entry. */
> -     movq    %rsp,%rdi
> -     call    _C_LABEL(trap)
> -     jmp     2b
> +     movq    CPUVAR(CURPROC),%r14
> +     orl     $MDP_IRET,P_MD_FLAGS(%r14)
> +     jmp     call_syscall
>  
> +/*
> + * Return via iretq, for real interrupts and signal returns
> + */
>  iret_return:
> -1:
>  #ifdef DIAGNOSTIC
>       cmpl    $IPL_NONE,CPUVAR(ILEVEL)
>       jne     3f
> -#endif /* DIAGNOSTIC */
> +#endif
>       .globl  intr_fast_exit
>  intr_fast_exit:
> +     movq    TF_RDI(%rsp),%rdi
> +     movq    TF_RSI(%rsp),%rsi
> +     movq    TF_R8(%rsp),%r8
> +     movq    TF_R9(%rsp),%r9
> +     movq    TF_R10(%rsp),%r10
> +     movq    TF_R12(%rsp),%r12
> +     movq    TF_R13(%rsp),%r13
> +     movq    TF_R14(%rsp),%r14
> +     movq    TF_R15(%rsp),%r15
> +     movq    TF_RBP(%rsp),%rbp
> +     movq    TF_RBX(%rsp),%rbx
> +
>       testq   $SEL_UPL,TF_CS(%rsp)
>       je      5f
> +
>       INTR_RESTORE_SELECTORS
> -5:   INTR_RESTORE_GPRS
> -     addq    $48,%rsp
> +
> +5:   movq    TF_RDX(%rsp),%rdx
> +     movq    TF_RCX(%rsp),%rcx
> +     movq    TF_R11(%rsp),%r11
> +     movq    TF_RAX(%rsp),%rax
> +     addq    $TF_RIP,%rsp
> +
>       .globl  _C_LABEL(doreti_iret)
>  _C_LABEL(doreti_iret):
>       iretq
> Index: machdep.c
> ===================================================================
> RCS file: /cvs/src/sys/arch/amd64/amd64/machdep.c,v
> retrieving revision 1.135
> diff -u -p -r1.135 machdep.c
> --- machdep.c 5 Apr 2011 21:14:00 -0000       1.135
> +++ machdep.c 5 Apr 2011 21:30:03 -0000
> @@ -674,6 +674,7 @@ sys_sigreturn(struct proc *p, void *v, r
>       else
>               p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
>       p->p_sigmask = ksc.sc_mask & ~sigcantmask;
> +     p->p_md.md_flags |= MDP_IRET;
>  
>       return (EJUSTRETURN);
>  }

Reply via email to