[PATCH 3.16 32/76] x86/retpoline: Fill return stack buffer on vmexit

2018-03-11 Thread Ben Hutchings
3.16.56-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream.

In accordance with the Intel and AMD documentation, we need to overwrite
all entries in the RSB on exiting a guest, to prevent malicious branch
target predictions from affecting the host kernel. This is needed both
for retpoline and for IBRS.

[ak: numbers again for the RSB stuffing labels]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Tested-by: Peter Zijlstra (Intel) 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-d...@amazon.co.uk
[bwh: Backported to 3.16:
 - Drop the ANNOTATE_NOSPEC_ALTERNATIVEs
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,6 +7,48 @@
 #include 
 #include 
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include ") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS32  /* To forcibly overwrite all 
entries */
+#define RSB_FILL_LOOPS 16  /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)  \
+   mov $(nr/2), reg;   \
+771:   \
+   call772f;   \
+773:   /* speculation trap */  \
+   pause;  \
+   jmp 773b;   \
+772:   \
+   call774f;   \
+775:   /* speculation trap */  \
+   pause;  \
+   jmp 775b;   \
+774:   \
+   dec reg;\
+   jnz 771b;   \
+   add $(BITS_PER_LONG/8) * nr, sp;
+
 #ifdef __ASSEMBLY__
 
 /*
@@ -61,6 +103,19 @@
 #endif
 .endm
 
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+   ALTERNATIVE "jmp .Lskip_rsb_\@",\
+   __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))\
+   \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
@@ -97,7 +152,7 @@
X86_FEATURE_RETPOLINE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
-#else /* No retpoline */
+#else /* No retpoline for C / inline asm */
 # define CALL_NOSPEC "call *%[thunk_target]\n"
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
@@ -112,5 +167,24 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
 };
 
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+   unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+   asm volatile (ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, 
RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=" (loops), 

[PATCH 3.16 32/76] x86/retpoline: Fill return stack buffer on vmexit

2018-03-11 Thread Ben Hutchings
3.16.56-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit 117cc7a908c83697b0b737d15ae1eb5943afe35b upstream.

In accordance with the Intel and AMD documentation, we need to overwrite
all entries in the RSB on exiting a guest, to prevent malicious branch
target predictions from affecting the host kernel. This is needed both
for retpoline and for IBRS.

[ak: numbers again for the RSB stuffing labels]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Tested-by: Peter Zijlstra (Intel) 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515755487-8524-1-git-send-email-d...@amazon.co.uk
[bwh: Backported to 3.16:
 - Drop the ANNOTATE_NOSPEC_ALTERNATIVEs
 - Adjust context]
Signed-off-by: Ben Hutchings 
---
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -7,6 +7,48 @@
 #include 
 #include 
 
+/*
+ * Fill the CPU return stack buffer.
+ *
+ * Each entry in the RSB, if used for a speculative 'ret', contains an
+ * infinite 'pause; jmp' loop to capture speculative execution.
+ *
+ * This is required in various cases for retpoline and IBRS-based
+ * mitigations for the Spectre variant 2 vulnerability. Sometimes to
+ * eliminate potentially bogus entries from the RSB, and sometimes
+ * purely to ensure that it doesn't get empty, which on some CPUs would
+ * allow predictions from other (unwanted!) sources to be used.
+ *
+ * We define a CPP macro such that it can be used from both .S files and
+ * inline assembly. It's possible to do a .macro and then include that
+ * from C via asm(".include ") but let's not go there.
+ */
+
+#define RSB_CLEAR_LOOPS32  /* To forcibly overwrite all 
entries */
+#define RSB_FILL_LOOPS 16  /* To avoid underflow */
+
+/*
+ * Google experimented with loop-unrolling and this turned out to be
+ * the optimal version — two calls, each with their own speculation
+ * trap should their return address end up getting used, in a loop.
+ */
+#define __FILL_RETURN_BUFFER(reg, nr, sp)  \
+   mov $(nr/2), reg;   \
+771:   \
+   call772f;   \
+773:   /* speculation trap */  \
+   pause;  \
+   jmp 773b;   \
+772:   \
+   call774f;   \
+775:   /* speculation trap */  \
+   pause;  \
+   jmp 775b;   \
+774:   \
+   dec reg;\
+   jnz 771b;   \
+   add $(BITS_PER_LONG/8) * nr, sp;
+
 #ifdef __ASSEMBLY__
 
 /*
@@ -61,6 +103,19 @@
 #endif
 .endm
 
+ /*
+  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
+  * monstrosity above, manually.
+  */
+.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+#ifdef CONFIG_RETPOLINE
+   ALTERNATIVE "jmp .Lskip_rsb_\@",\
+   __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))\
+   \ftr
+.Lskip_rsb_\@:
+#endif
+.endm
+
 #else /* __ASSEMBLY__ */
 
 #if defined(CONFIG_X86_64) && defined(RETPOLINE)
@@ -97,7 +152,7 @@
X86_FEATURE_RETPOLINE)
 
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
-#else /* No retpoline */
+#else /* No retpoline for C / inline asm */
 # define CALL_NOSPEC "call *%[thunk_target]\n"
 # define THUNK_TARGET(addr) [thunk_target] "rm" (addr)
 #endif
@@ -112,5 +167,24 @@ enum spectre_v2_mitigation {
SPECTRE_V2_IBRS,
 };
 
+/*
+ * On VMEXIT we must ensure that no RSB predictions learned in the guest
+ * can be followed in the host, by overwriting the RSB completely. Both
+ * retpoline and IBRS mitigations for Spectre v2 need this; only on future
+ * CPUs with IBRS_ATT *might* it be avoided.
+ */
+static inline void vmexit_fill_RSB(void)
+{
+#ifdef CONFIG_RETPOLINE
+   unsigned long loops = RSB_CLEAR_LOOPS / 2;
+
+   asm volatile (ALTERNATIVE("jmp 910f",
+ __stringify(__FILL_RETURN_BUFFER(%0, 
RSB_CLEAR_LOOPS, %1)),
+ X86_FEATURE_RETPOLINE)
+ "910:"
+ : "=" (loops), ASM_CALL_CONSTRAINT
+ : "r" (loops) : "memory" );
+#endif
+}
 #endif /* __ASSEMBLY__ */
 #endif /* __NOSPEC_BRANCH_H__ */
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -36,6 +36,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include "trace.h"
@@ -3963,6 +3964,9 @@ static void svm_vcpu_run(struct kvm_vcpu