[PATCH 4.15 089/146] Revert "x86/retpoline: Simplify vmexit_fill_RSB()"

2018-03-13 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit d1c99108af3c5992640aa2afa7d2e88c3775c06e upstream.

This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.

It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.

Signed-off-by: David Woodhouse 
Acked-by: Thomas Gleixner 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: arjan.van.de@intel.com
Cc: b...@alien8.de
Cc: dave.han...@intel.com
Cc: jmatt...@google.com
Cc: karah...@amazon.de
Cc: k...@vger.kernel.org
Cc: pbonz...@redhat.com
Cc: rkrc...@redhat.com
Link: 
http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-d...@amazon.co.uk
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S |3 -
 arch/x86/entry/entry_64.S |3 -
 arch/x86/include/asm/asm-prototypes.h |3 -
 arch/x86/include/asm/nospec-branch.h  |   70 ++
 arch/x86/lib/Makefile |1 
 arch/x86/lib/retpoline.S  |   56 ---
 6 files changed, 65 insertions(+), 71 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
 * exist, overwrite the RSB with entries which capture
 * speculative execution to prevent attack.
 */
-   /* Clobbers %ebx */
-   FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
/* restore callee-saved registers */
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
 * exist, overwrite the RSB with entries which capture
 * speculative execution to prevent attack.
 */
-   /* Clobbers %rbx */
-   FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
/* restore callee-saved registers */
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
 #include 
 #include 
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include ") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS32  /* To forcibly overwrite all 
entries */
+#define RSB_FILL_LOOPS 16  /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)  \
+   mov $(nr/2), reg;   \
+771:   \
+   call772f;   \
+773:   /* speculation trap */  \
+   pause;  \
+   lfence; \
+   jmp 773b;   \
+772:   \
+   call774f;   \
+775:   /* speculation trap */  \
+   pause;  \
+   lfence; \
+   jmp 775b;   \
+774:   \
+   dec reg;\
+   jnz 771b;   \
+   add $(BITS_PER_LONG/8) * nr, 

[PATCH 4.15 089/146] Revert "x86/retpoline: Simplify vmexit_fill_RSB()"

2018-03-13 Thread Greg Kroah-Hartman
4.15-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit d1c99108af3c5992640aa2afa7d2e88c3775c06e upstream.

This reverts commit 1dde7415e99933bb7293d6b2843752cbdb43ec11. By putting
the RSB filling out of line and calling it, we waste one RSB slot for
returning from the function itself, which means one fewer actual function
call we can make if we're doing the Skylake abomination of call-depth
counting.

It also changed the number of RSB stuffings we do on vmexit from 32,
which was correct, to 16. Let's just stop with the bikeshedding; it
didn't actually *fix* anything anyway.

Signed-off-by: David Woodhouse 
Acked-by: Thomas Gleixner 
Cc: Linus Torvalds 
Cc: Peter Zijlstra 
Cc: arjan.van.de@intel.com
Cc: b...@alien8.de
Cc: dave.han...@intel.com
Cc: jmatt...@google.com
Cc: karah...@amazon.de
Cc: k...@vger.kernel.org
Cc: pbonz...@redhat.com
Cc: rkrc...@redhat.com
Link: 
http://lkml.kernel.org/r/1519037457-7643-4-git-send-email-d...@amazon.co.uk
Signed-off-by: Ingo Molnar 
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S |3 -
 arch/x86/entry/entry_64.S |3 -
 arch/x86/include/asm/asm-prototypes.h |3 -
 arch/x86/include/asm/nospec-branch.h  |   70 ++
 arch/x86/lib/Makefile |1 
 arch/x86/lib/retpoline.S  |   56 ---
 6 files changed, 65 insertions(+), 71 deletions(-)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,8 +252,7 @@ ENTRY(__switch_to_asm)
 * exist, overwrite the RSB with entries which capture
 * speculative execution to prevent attack.
 */
-   /* Clobbers %ebx */
-   FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
/* restore callee-saved registers */
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -364,8 +364,7 @@ ENTRY(__switch_to_asm)
 * exist, overwrite the RSB with entries which capture
 * speculative execution to prevent attack.
 */
-   /* Clobbers %rbx */
-   FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
/* restore callee-saved registers */
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,7 +38,4 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
-asmlinkage void __fill_rsb(void);
-asmlinkage void __clear_rsb(void);
-
 #endif /* CONFIG_RETPOLINE */
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -8,6 +8,50 @@
 #include 
 #include 
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; lfence; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include ") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS32  /* To forcibly overwrite all 
entries */
+#define RSB_FILL_LOOPS 16  /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)  \
+   mov $(nr/2), reg;   \
+771:   \
+   call772f;   \
+773:   /* speculation trap */  \
+   pause;  \
+   lfence; \
+   jmp 773b;   \
+772:   \
+   call774f;   \
+775:   /* speculation trap */  \
+   pause;  \
+   lfence; \
+   jmp 775b;   \
+774:   \
+   dec reg;\
+   jnz 771b;   \
+   add $(BITS_PER_LONG/8) * nr, sp;
+
 #ifdef __ASSEMBLY__
 
 /*
@@ -78,10 +122,17 @@
 #endif
 .endm
 
-/* This clobbers the BX register */
-.macro FILL_RETURN_BUFFER nr:req ftr:req
+ /*
+