On 02/09/2018 04:25 AM, Joerg Roedel wrote:
> From: Joerg Roedel <[email protected]>
>
> Add unconditional cr3 switches between user and kernel cr3
> to all non-NMI entry and exit points.
>
> Signed-off-by: Joerg Roedel <[email protected]>
> ---
>  arch/x86/entry/entry_32.S | 59 
> ++++++++++++++++++++++++++++++++++++++++++++++-
>  1 file changed, 58 insertions(+), 1 deletion(-)
>
> diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
> index 9693485..b5ef003 100644
> --- a/arch/x86/entry/entry_32.S
> +++ b/arch/x86/entry/entry_32.S
> @@ -328,6 +328,25 @@
>  #endif /* CONFIG_X86_ESPFIX32 */
>  .endm
>  
> +/* Unconditionally switch to user cr3 */
> +.macro SWITCH_TO_USER_CR3 scratch_reg:req
> +     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
> +
> +     movl    %cr3, \scratch_reg
> +     orl     $PTI_SWITCH_MASK, \scratch_reg
> +     movl    \scratch_reg, %cr3
> +.Lend_\@:
> +.endm
> +
> +/* Unconditionally switch to kernel cr3 */
> +.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
> +     ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
> +     movl    %cr3, \scratch_reg
> +     andl    $(~PTI_SWITCH_MASK), \scratch_reg
> +     movl    \scratch_reg, %cr3
> +.Lend_\@:
> +.endm
> +
>  
>  /*
>   * Called with pt_regs fully populated and kernel segments loaded,
> @@ -343,6 +362,8 @@
>  
>       ALTERNATIVE     "", "jmp .Lend_\@", X86_FEATURE_XENPV
>  
> +     SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
> +
>       /* Are we on the entry stack? Bail out if not! */
>       movl    PER_CPU_VAR(cpu_entry_area), %edi
>       addl    $CPU_ENTRY_AREA_entry_stack, %edi
> @@ -637,6 +658,18 @@ ENTRY(xen_sysenter_target)
>   * 0(%ebp) arg6
>   */
>  ENTRY(entry_SYSENTER_32)
> +     /*
> +      * On entry-stack with all userspace-regs live - save and
> +      * restore eflags and %eax to use it as scratch-reg for the cr3
> +      * switch.
> +      */
> +     pushfl
> +     pushl   %eax
> +     SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
> +     popl    %eax
> +     popfl
> +
> +     /* Stack empty again, switch to task stack */
>       movl    TSS_entry_stack(%esp), %esp
>  
>  .Lsysenter_past_esp:
> @@ -691,6 +724,10 @@ ENTRY(entry_SYSENTER_32)
>       movl    PT_OLDESP(%esp), %ecx   /* pt_regs->sp */
>  1:   mov     PT_FS(%esp), %fs
>       PTGS_TO_GS
> +
> +     /* Segments are restored - switch to user cr3 */
> +     SWITCH_TO_USER_CR3 scratch_reg=%eax
> +
>       popl    %ebx                    /* pt_regs->bx */
>       addl    $2*4, %esp              /* skip pt_regs->cx and pt_regs->dx */
>       popl    %esi                    /* pt_regs->si */
> @@ -778,7 +815,23 @@ restore_all:
>  .Lrestore_all_notrace:
>       CHECK_AND_APPLY_ESPFIX
>  .Lrestore_nocheck:
> -     RESTORE_REGS 4                          # skip orig_eax/error_code
> +     /*
> +      * First restore user segments. This can cause exceptions, so we
> +      * run it with kernel cr3.
> +      */
> +     RESTORE_SEGMENTS
> +
> +     /*
> +      * Segments are restored - no more exceptions from here on except on
> +      * iret, but that handled safely.
> +      */
> +     SWITCH_TO_USER_CR3 scratch_reg=%eax
> +
> +     /* Restore rest */
> +     RESTORE_INT_REGS
> +
> +     /* Unwind stack to the iret frame */
> +     RESTORE_SKIP_SEGMENTS 4                 # skip orig_eax/error_code
>  .Lirq_return:
>       INTERRUPT_RETURN
>  
> @@ -1139,6 +1192,10 @@ ENTRY(debug)
>  
>       SAVE_ALL
>       ENCODE_FRAME_POINTER
> +
> +     /* Make sure we are running on kernel cr3 */
> +     SWITCH_TO_KERNEL_CR3 scratch_reg=%eax
> +
>       xorl    %edx, %edx                      # error code 0
>       movl    %esp, %eax                      # pt_regs pointer
>  

The debug exception calls ret_from_exception on exit. If coming from
userspace, the C function prepare_exit_to_usermode() will be called.
With the PTI-32 code, it means that function will be called with the
entry stack instead of the task stack. This can be problematic as macro
like current won't work anymore.

-Longman

Reply via email to