[PATCH v2 03/10] x86/entry/32: Filter NT and speed up AC filtering in SYSENTER

2016-03-05 Thread Andy Lutomirski
This makes the 32-bit code work just like the 64-bit code.  It should
speed up syscalls on 32-bit kernels on Skylake by something like 20
cycles (by analogy to the 64-bit compat case).

It also cleans up NT just like we do for the 64-bit case.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/entry_32.S | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ab710eee4308..289a17bf0c71 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,7 +294,6 @@ sysenter_past_esp:
pushl   $__USER_DS  /* pt_regs->ss */
pushl   %ebp/* pt_regs->sp (stashed in bp) */
pushfl  /* pt_regs->flags (except IF = 0) */
-   ASM_CLAC/* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp)  /* Fix IF */
pushl   $__USER_CS  /* pt_regs->cs */
pushl   $0  /* pt_regs->ip = 0 (placeholder) */
@@ -302,6 +301,23 @@ sysenter_past_esp:
SAVE_ALL pt_regs_ax=$-ENOSYS/* save rest */
 
/*
+* SYSENTER doesn't filter flags, so we need to clear NT and AC
+* ourselves.  To save a few cycles, we can check whether
+* either was set instead of doing an unconditional popfq.
+* This needs to happen before enabling interrupts so that
+* we don't get preempted with NT set.
+*
+* NB.: .Lsysenter_fix_flags is a label with the code under it moved
+* out-of-line as an optimization: NT is unlikely to be set in the
+* majority of the cases and instead of polluting the I$ unnecessarily,
+* we're keeping that code behind a branch which will predict as
+* not-taken and therefore its instructions won't be fetched.
+*/
+   testl   $X86_EFLAGS_NT|X86_EFLAGS_AC, PT_EFLAGS(%esp)
+   jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+   /*
 * User mode is traced as though IRQs are on, and SYSENTER
 * turned them off.
 */
@@ -339,6 +355,11 @@ sysenter_past_esp:
 .popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+   pushl   $X86_EFLAGS_FIXED
+   popfl
+   jmp .Lsysenter_flags_fixed
 ENDPROC(entry_SYSENTER_32)
 
# system call handler stub
-- 
2.5.0



[PATCH v2 03/10] x86/entry/32: Filter NT and speed up AC filtering in SYSENTER

2016-03-05 Thread Andy Lutomirski
This makes the 32-bit code work just like the 64-bit code.  It should
speed up syscalls on 32-bit kernels on Skylake by something like 20
cycles (by analogy to the 64-bit compat case).

It also cleans up NT just like we do for the 64-bit case.

Signed-off-by: Andy Lutomirski 
---
 arch/x86/entry/entry_32.S | 23 ++-
 1 file changed, 22 insertions(+), 1 deletion(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index ab710eee4308..289a17bf0c71 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -294,7 +294,6 @@ sysenter_past_esp:
pushl   $__USER_DS  /* pt_regs->ss */
pushl   %ebp/* pt_regs->sp (stashed in bp) */
pushfl  /* pt_regs->flags (except IF = 0) */
-   ASM_CLAC/* Clear AC after saving FLAGS */
orl $X86_EFLAGS_IF, (%esp)  /* Fix IF */
pushl   $__USER_CS  /* pt_regs->cs */
pushl   $0  /* pt_regs->ip = 0 (placeholder) */
@@ -302,6 +301,23 @@ sysenter_past_esp:
SAVE_ALL pt_regs_ax=$-ENOSYS/* save rest */
 
/*
+* SYSENTER doesn't filter flags, so we need to clear NT and AC
+* ourselves.  To save a few cycles, we can check whether
+* either was set instead of doing an unconditional popfq.
+* This needs to happen before enabling interrupts so that
+* we don't get preempted with NT set.
+*
+* NB.: .Lsysenter_fix_flags is a label with the code under it moved
+* out-of-line as an optimization: NT is unlikely to be set in the
+* majority of the cases and instead of polluting the I$ unnecessarily,
+* we're keeping that code behind a branch which will predict as
+* not-taken and therefore its instructions won't be fetched.
+*/
+   testl   $X86_EFLAGS_NT|X86_EFLAGS_AC, PT_EFLAGS(%esp)
+   jnz .Lsysenter_fix_flags
+.Lsysenter_flags_fixed:
+
+   /*
 * User mode is traced as though IRQs are on, and SYSENTER
 * turned them off.
 */
@@ -339,6 +355,11 @@ sysenter_past_esp:
 .popsection
_ASM_EXTABLE(1b, 2b)
PTGS_TO_GS_EX
+
+.Lsysenter_fix_flags:
+   pushl   $X86_EFLAGS_FIXED
+   popfl
+   jmp .Lsysenter_flags_fixed
 ENDPROC(entry_SYSENTER_32)
 
# system call handler stub
-- 
2.5.0