[PATCH v2 03/10] x86/entry/32: Filter NT and speed up AC filtering in SYSENTER
This makes the 32-bit code work just like the 64-bit code. It should speed up syscalls on 32-bit kernels on Skylake by something like 20 cycles (by analogy to the 64-bit compat case). It also cleans up NT just like we do for the 64-bit case. Signed-off-by: Andy Lutomirski--- arch/x86/entry/entry_32.S | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ab710eee4308..289a17bf0c71 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -294,7 +294,6 @@ sysenter_past_esp: pushl $__USER_DS /* pt_regs->ss */ pushl %ebp/* pt_regs->sp (stashed in bp) */ pushfl /* pt_regs->flags (except IF = 0) */ - ASM_CLAC/* Clear AC after saving FLAGS */ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ pushl $__USER_CS /* pt_regs->cs */ pushl $0 /* pt_regs->ip = 0 (placeholder) */ @@ -302,6 +301,23 @@ sysenter_past_esp: SAVE_ALL pt_regs_ax=$-ENOSYS/* save rest */ /* +* SYSENTER doesn't filter flags, so we need to clear NT and AC +* ourselves. To save a few cycles, we can check whether +* either was set instead of doing an unconditional popfq. +* This needs to happen before enabling interrupts so that +* we don't get preempted with NT set. +* +* NB.: .Lsysenter_fix_flags is a label with the code under it moved +* out-of-line as an optimization: NT is unlikely to be set in the +* majority of the cases and instead of polluting the I$ unnecessarily, +* we're keeping that code behind a branch which will predict as +* not-taken and therefore its instructions won't be fetched. +*/ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC, PT_EFLAGS(%esp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. */ @@ -339,6 +355,11 @@ sysenter_past_esp: .popsection _ASM_EXTABLE(1b, 2b) PTGS_TO_GS_EX + +.Lsysenter_fix_flags: + pushl $X86_EFLAGS_FIXED + popfl + jmp .Lsysenter_flags_fixed ENDPROC(entry_SYSENTER_32) # system call handler stub -- 2.5.0
[PATCH v2 03/10] x86/entry/32: Filter NT and speed up AC filtering in SYSENTER
This makes the 32-bit code work just like the 64-bit code. It should speed up syscalls on 32-bit kernels on Skylake by something like 20 cycles (by analogy to the 64-bit compat case). It also cleans up NT just like we do for the 64-bit case. Signed-off-by: Andy Lutomirski --- arch/x86/entry/entry_32.S | 23 ++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index ab710eee4308..289a17bf0c71 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -294,7 +294,6 @@ sysenter_past_esp: pushl $__USER_DS /* pt_regs->ss */ pushl %ebp/* pt_regs->sp (stashed in bp) */ pushfl /* pt_regs->flags (except IF = 0) */ - ASM_CLAC/* Clear AC after saving FLAGS */ orl $X86_EFLAGS_IF, (%esp) /* Fix IF */ pushl $__USER_CS /* pt_regs->cs */ pushl $0 /* pt_regs->ip = 0 (placeholder) */ @@ -302,6 +301,23 @@ sysenter_past_esp: SAVE_ALL pt_regs_ax=$-ENOSYS/* save rest */ /* +* SYSENTER doesn't filter flags, so we need to clear NT and AC +* ourselves. To save a few cycles, we can check whether +* either was set instead of doing an unconditional popfq. +* This needs to happen before enabling interrupts so that +* we don't get preempted with NT set. +* +* NB.: .Lsysenter_fix_flags is a label with the code under it moved +* out-of-line as an optimization: NT is unlikely to be set in the +* majority of the cases and instead of polluting the I$ unnecessarily, +* we're keeping that code behind a branch which will predict as +* not-taken and therefore its instructions won't be fetched. +*/ + testl $X86_EFLAGS_NT|X86_EFLAGS_AC, PT_EFLAGS(%esp) + jnz .Lsysenter_fix_flags +.Lsysenter_flags_fixed: + + /* * User mode is traced as though IRQs are on, and SYSENTER * turned them off. */ @@ -339,6 +355,11 @@ sysenter_past_esp: .popsection _ASM_EXTABLE(1b, 2b) PTGS_TO_GS_EX + +.Lsysenter_fix_flags: + pushl $X86_EFLAGS_FIXED + popfl + jmp .Lsysenter_flags_fixed ENDPROC(entry_SYSENTER_32) # system call handler stub -- 2.5.0