[PATCH 3.16 41/76] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-11 Thread Ben Hutchings
3.16.56-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
[bwh: Backported to 3.16: use the first available feature number]
Signed-off-by: Ben Hutchings 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/switch_to.h  |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -187,6 +187,7 @@
 #define X86_FEATURE_HW_PSTATE  (7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && 
CR4.PCIDE=1 */
+#define X86_FEATURE_RSB_CTXSW  (7*32+11) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  (7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_SWITCH_TO_H
 #define _ASM_X86_SWITCH_TO_H
 
+#include 
+
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 __visible struct task_struct *__switch_to(struct task_struct *prev,
   struct task_struct *next);
@@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +65,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl 

[PATCH 3.16 41/76] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-11 Thread Ben Hutchings
3.16.56-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
[bwh: Backported to 3.16: use the first available feature number]
Signed-off-by: Ben Hutchings 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/switch_to.h  |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -187,6 +187,7 @@
 #define X86_FEATURE_HW_PSTATE  (7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
 #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && 
CR4.PCIDE=1 */
+#define X86_FEATURE_RSB_CTXSW  (7*32+11) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  (7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_SWITCH_TO_H
 #define _ASM_X86_SWITCH_TO_H
 
+#include 
+
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 __visible struct task_struct *__switch_to(struct task_struct *prev,
   struct task_struct *next);
@@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +65,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl %[next_ip]\n\t" /* restore EIP   */ \
 __switch_canary\
+__retpoline_fill_return_buffer \
 "jmp __switch_to\n"/* regparm call  */ \
 "1:\t" \
 "popl %%ebp\n\t"   /* restore 

[PATCH 3.2 075/104] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-11 Thread Ben Hutchings
3.2.101-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
[bwh: Backported to 3.2:
 - Use the first available feature number
 - Adjust filename]
Signed-off-by: Ben Hutchings 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/system.h |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -178,6 +178,7 @@
 #define X86_FEATURE_PTS(7*32+ 6) /* Intel Package Thermal 
Status */
 #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
 #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && 
CR4.PCIDE=1 */
+#define X86_FEATURE_RSB_CTXSW  (7*32+9) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  (7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -41,6 +42,23 @@ extern void show_regs_common(void);
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -63,6 +81,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl %[next_ip]\n\t" /* restore EIP   */ \
 __switch_canary\
+__retpoline_fill_return_buffer \
 

[PATCH 3.2 075/104] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-11 Thread Ben Hutchings
3.2.101-rc1 review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
[bwh: Backported to 3.2:
 - Use the first available feature number
 - Adjust filename]
Signed-off-by: Ben Hutchings 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/system.h |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -178,6 +178,7 @@
 #define X86_FEATURE_PTS(7*32+ 6) /* Intel Package Thermal 
Status */
 #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */
 #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && 
CR4.PCIDE=1 */
+#define X86_FEATURE_RSB_CTXSW  (7*32+9) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  (7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -6,6 +6,7 @@
 #include 
 #include 
 #include 
+#include 
 
 #include 
 #include 
@@ -41,6 +42,23 @@ extern void show_regs_common(void);
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -63,6 +81,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl %[next_ip]\n\t" /* restore EIP   */ \
 __switch_canary\
+__retpoline_fill_return_buffer \
 "jmp __switch_to\n"/* regparm call  */ \
 "1:\t" \
 "popl %%ebp\n\t"   /* restore EBP   */ \
@@ -117,6 +136,23 @@ do {   
\
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef 

Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Maciej S. Szmigiero
On 09.03.2018 16:14, Andi Kleen wrote:
>> Shouldn't the RSB filling on context switch also be done on non-IBPB
>> CPUs to protect (retpolined) user space tasks from other user space
>> tasks?
> 
> The comment is actually incorrect. There's no risk to hit user space
> addresses if we have KPTI and NX (which is fairly universal).
> 
> It's mainly needed on Skylake era CPUs.
> 
> Should fix the comment. I'll send a patch.

But what about userspace-to-userspace attacks? - the ones that IBPB on 
context switches currently protects against (at least for high-value, or
as implemented currently, non-dumpable, processes)?

If understand the issue correctly, high-value user space processes can
be protected from other user space processes even on CPUs that lack
IBPB as long as they are recompiled with retpolines and there is no
danger of RSB entries from one process being used by another one after
a context switch.
For Skyklake this would not be enough, but there we'll (hopefully) have
the IBPB instead.

> -Andi
> 

Maciej


Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Maciej S. Szmigiero
On 09.03.2018 16:14, Andi Kleen wrote:
>> Shouldn't the RSB filling on context switch also be done on non-IBPB
>> CPUs to protect (retpolined) user space tasks from other user space
>> tasks?
> 
> The comment is actually incorrect. There's no risk to hit user space
> addresses if we have KPTI and NX (which is fairly universal).
> 
> It's mainly needed on Skylake era CPUs.
> 
> Should fix the comment. I'll send a patch.

But what about userspace-to-userspace attacks? - the ones that IBPB on 
context switches currently protects against (at least for high-value, or
as implemented currently, non-dumpable, processes)?

If understand the issue correctly, high-value user space processes can
be protected from other user space processes even on CPUs that lack
IBPB as long as they are recompiled with retpolines and there is no
danger of RSB entries from one process being used by another one after
a context switch.
For Skyklake this would not be enough, but there we'll (hopefully) have
the IBPB instead.

> -Andi
> 

Maciej


Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Andi Kleen
> Shouldn't the RSB filling on context switch also be done on non-IBPB
> CPUs to protect (retpolined) user space tasks from other user space
> tasks?

The comment is actually incorrect. There's no risk to hit user space
addresses if we have KPTI and NX (which is fairly universal).

It's mainly needed on Skylake era CPUs.

Should fix the comment. I'll send a patch.

-Andi


Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Andi Kleen
> Shouldn't the RSB filling on context switch also be done on non-IBPB
> CPUs to protect (retpolined) user space tasks from other user space
> tasks?

The comment is actually incorrect. There's no risk to hit user space
addresses if we have KPTI and NX (which is fairly universal).

It's mainly needed on Skylake era CPUs.

Should fix the comment. I'll send a patch.

-Andi


Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Maciej S. Szmigiero
On 12.01.2018 18:49, Woodhouse, David wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
> 
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.
> 
> Signed-off-by: David Woodhouse 
> Acked-by: Arjan van de Ven 
> ---
(..)
> @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
>  
>   spectre_v2_enabled = mode;
>   pr_info("%s\n", spectre_v2_strings[mode]);
> +
> + /*
> +  * If we don't have SMEP or KPTI, then we run the risk of hitting
> +  * userspace addresses in the RSB after a context switch from a
> +  * shallow call stack to a deeper one. We must must fill the entire
> +  * RSB to avoid that, even when using IBRS.
> +  *
> +  * Skylake era CPUs have a separate issue with *underflow* of the
> +  * RSB, when they will predict 'ret' targets from the generic BTB.
> +  * IBRS makes that safe, but we need to fill the RSB on context
> +  * switch if we're using retpoline.
> +  */
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");
> + }

Shouldn't the RSB filling on context switch also be done on non-IBPB
CPUs to protect (retpolined) user space tasks from other user space
tasks?

We already issue a IBPB when switching to high-value user space tasks
to protect them from other user space tasks.

Thanks,
Maciej


Re: x86/retpoline: Fill RSB on context switch for affected CPUs

2018-03-09 Thread Maciej S. Szmigiero
On 12.01.2018 18:49, Woodhouse, David wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
> 
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.
> 
> Signed-off-by: David Woodhouse 
> Acked-by: Arjan van de Ven 
> ---
(..)
> @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
>  
>   spectre_v2_enabled = mode;
>   pr_info("%s\n", spectre_v2_strings[mode]);
> +
> + /*
> +  * If we don't have SMEP or KPTI, then we run the risk of hitting
> +  * userspace addresses in the RSB after a context switch from a
> +  * shallow call stack to a deeper one. We must must fill the entire
> +  * RSB to avoid that, even when using IBRS.
> +  *
> +  * Skylake era CPUs have a separate issue with *underflow* of the
> +  * RSB, when they will predict 'ret' targets from the generic BTB.
> +  * IBRS makes that safe, but we need to fill the RSB on context
> +  * switch if we're using retpoline.
> +  */
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");
> + }

Shouldn't the RSB filling on context switch also be done on non-IBPB
CPUs to protect (retpolined) user space tasks from other user space
tasks?

We already issue a IBPB when switching to high-value user space tasks
to protect them from other user space tasks.

Thanks,
Maciej


[PATCH 4.4 07/74] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-29 Thread Greg Kroah-Hartman
4.4-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/switch_to.h  |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -199,6 +199,7 @@
 #define X86_FEATURE_HWP_EPP( 7*32+13) /* Intel HWP_EPP */
 #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  ( 7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_SWITCH_TO_H
 #define _ASM_X86_SWITCH_TO_H
 
+#include 
+
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 __visible struct task_struct *__switch_to(struct task_struct *prev,
   struct task_struct *next);
@@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +65,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl %[next_ip]\n\t" /* restore EIP   */ \
 __switch_canary   

[PATCH 4.4 07/74] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-29 Thread Greg Kroah-Hartman
4.4-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

[js] backport to 4.4 -- __switch_to_asm does not exist there, we
 have to patch the switch_to macros for both x86_32 and x86_64.

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Jiri Slaby 
Signed-off-by: Greg Kroah-Hartman 
---
 arch/x86/include/asm/cpufeature.h |1 +
 arch/x86/include/asm/switch_to.h  |   38 ++
 arch/x86/kernel/cpu/bugs.c|   36 
 3 files changed, 75 insertions(+)

--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -199,6 +199,7 @@
 #define X86_FEATURE_HWP_EPP( 7*32+13) /* Intel HWP_EPP */
 #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context switches */
 
 #define X86_FEATURE_RETPOLINE  ( 7*32+29) /* Generic Retpoline mitigation for 
Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for 
Spectre variant 2 */
--- a/arch/x86/include/asm/switch_to.h
+++ b/arch/x86/include/asm/switch_to.h
@@ -1,6 +1,8 @@
 #ifndef _ASM_X86_SWITCH_TO_H
 #define _ASM_X86_SWITCH_TO_H
 
+#include 
+
 struct task_struct; /* one of the stranger aspects of C forward declarations */
 __visible struct task_struct *__switch_to(struct task_struct *prev,
   struct task_struct *next);
@@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct
 #define __switch_canary_iparam
 #endif /* CC_STACKPROTECTOR */
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+#define __retpoline_fill_return_buffer \
+   ALTERNATIVE("jmp 910f", \
+   __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, 
%%esp)),\
+   X86_FEATURE_RSB_CTXSW)  \
+   "910:\n\t"
+#else
+#define __retpoline_fill_return_buffer
+#endif
+
 /*
  * Saving eflags is important. It switches not only IOPL between tasks,
  * it also protects other tasks from NT leaking through sysenter etc.
@@ -46,6 +65,7 @@ do {  
\
 "movl $1f,%[prev_ip]\n\t"  /* saveEIP   */ \
 "pushl %[next_ip]\n\t" /* restore EIP   */ \
 __switch_canary\
+__retpoline_fill_return_buffer \
 "jmp __switch_to\n"/* regparm call  */ \
 "1:\t" \
 "popl %%ebp\n\t"   /* restore EBP   */ \
@@ -100,6 +120,23 @@ do {   

[PATCH 4.14 32/89] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-22 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S  |   11 +++
 arch/x86/entry/entry_64.S  |   11 +++
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/kernel/cpu/bugs.c |   36 
 4 files changed, 59 insertions(+)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+   boot_cpu_data.x86 == 6) {
+

[PATCH 4.14 32/89] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-22 Thread Greg Kroah-Hartman
4.14-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S  |   11 +++
 arch/x86/entry/entry_64.S  |   11 +++
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/kernel/cpu/bugs.c |   36 
 4 files changed, 59 insertions(+)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+   boot_cpu_data.x86 == 6) {
+   switch (boot_cpu_data.x86_model) {
+   case INTEL_FAM6_SKYLAKE_MOBILE:
+   case INTEL_FAM6_SKYLAKE_DESKTOP:
+   case INTEL_FAM6_SKYLAKE_X:
+   case INTEL_FAM6_KABYLAKE_MOBILE:
+   case INTEL_FAM6_KABYLAKE_DESKTOP:
+   return true;
+   }
+   }
+   return false;
+}
+
 static 

[PATCH 4.9 14/47] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-22 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S  |   11 +++
 arch/x86/entry/entry_64.S  |   11 +++
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/kernel/cpu/bugs.c |   36 
 4 files changed, 59 insertions(+)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -229,6 +229,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -427,6 +427,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -200,6 +200,7 @@
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network 
Instructions */
 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation 
Single precision */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o 
nokaiser */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -154,6 +155,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   

[PATCH 4.9 14/47] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-22 Thread Greg Kroah-Hartman
4.9-stable review patch.  If anyone has any objections, please let me know.

--

From: David Woodhouse 

commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream.

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk
Signed-off-by: Greg Kroah-Hartman 

---
 arch/x86/entry/entry_32.S  |   11 +++
 arch/x86/entry/entry_64.S  |   11 +++
 arch/x86/include/asm/cpufeatures.h |1 +
 arch/x86/kernel/cpu/bugs.c |   36 
 4 files changed, 59 insertions(+)

--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -229,6 +229,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -427,6 +427,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -200,6 +200,7 @@
 #define X86_FEATURE_INTEL_PT   ( 7*32+15) /* Intel Processor Trace */
 #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network 
Instructions */
 #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation 
Single precision */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
 #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o 
nokaiser */
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -22,6 +22,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -154,6 +155,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
+   boot_cpu_data.x86 == 6) {
+   switch (boot_cpu_data.x86_model) {
+   case INTEL_FAM6_SKYLAKE_MOBILE:
+   case INTEL_FAM6_SKYLAKE_DESKTOP:
+   case INTEL_FAM6_SKYLAKE_X:
+   case INTEL_FAM6_KABYLAKE_MOBILE:
+   case INTEL_FAM6_KABYLAKE_DESKTOP:

Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread Kees Cook
On Mon, Jan 15, 2018 at 6:42 AM, Arjan van de Ven  wrote:
>>
>> This would means that userspace would see return predictions based
>> on the values the kernel 'stuffed' into the RSB to fill it.
>>
>> Potentially this leaks a kernel address to userspace.
>
>
> KASLR pretty much died in May this year to be honest with the KAISER paper
> (if not before then)

KASLR was always on shaky ground for local attacks. For pure remote
attacks, it's still useful. And for driving forward research, it
appears to be quite useful. ;)

-Kees

-- 
Kees Cook
Pixel Security


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread Kees Cook
On Mon, Jan 15, 2018 at 6:42 AM, Arjan van de Ven  wrote:
>>
>> This would means that userspace would see return predictions based
>> on the values the kernel 'stuffed' into the RSB to fill it.
>>
>> Potentially this leaks a kernel address to userspace.
>
>
> KASLR pretty much died in May this year to be honest with the KAISER paper
> (if not before then)

KASLR was always on shaky ground for local attacks. For pure remote
attacks, it's still useful. And for driving forward research, it
appears to be quite useful. ;)

-Kees

-- 
Kees Cook
Pixel Security


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread Arjan van de Ven


This would means that userspace would see return predictions based
on the values the kernel 'stuffed' into the RSB to fill it.

Potentially this leaks a kernel address to userspace.


KASLR pretty much died in May this year to be honest with the KAISER paper (if 
not before then)

also with KPTI the address won't have a TLB mapping so it wouldn't
actually be speculated into.


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread Arjan van de Ven


This would means that userspace would see return predictions based
on the values the kernel 'stuffed' into the RSB to fill it.

Potentially this leaks a kernel address to userspace.


KASLR pretty much died in May this year to be honest with the KAISER paper (if 
not before then)

also with KPTI the address won't have a TLB mapping so it wouldn't
actually be speculated into.


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Woodhouse
On Mon, 2018-01-15 at 14:35 +, David Laight wrote:
> From: David Woodhouse
> > 
> > Sent: 14 January 2018 17:04
> > x86/retpoline: Fill RSB on context switch for affected CPUs
> > 
> > On context switch from a shallow call stack to a deeper one, as the CPU
> > does 'ret' up the deeper side it may encounter RSB entries (predictions for
> > where the 'ret' goes to) which were populated in userspace.
> > 
> > This is problematic if neither SMEP nor KPTI (the latter of which marks
> > userspace pages as NX for the kernel) are active, as malicious code in
> > userspace may then be executed speculatively.
> ...
> 
> Do we have a guarantee that all cpu actually detect the related RSB underflow?
> 
> It wouldn't surprise me if at least some cpu just let it wrap.
> 
> This would means that userspace would see return predictions based
> on the values the kernel 'stuffed' into the RSB to fill it.
> 
> Potentially this leaks a kernel address to userspace.

Yeah, KASLR is dead unless we do a full IBPB before *every* VMLAUNCH or
return to userspace anyway, isn't it? With KPTI we could put the RSB-
stuffer into the syscall trampoline page perhaps...

For this to be a concern for userspace, I think it does have to be true
that only the lower bits are used, which adds a little complexity but
probably isn't insurmountable?



smime.p7s
Description: S/MIME cryptographic signature


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Woodhouse
On Mon, 2018-01-15 at 14:35 +, David Laight wrote:
> From: David Woodhouse
> > 
> > Sent: 14 January 2018 17:04
> > x86/retpoline: Fill RSB on context switch for affected CPUs
> > 
> > On context switch from a shallow call stack to a deeper one, as the CPU
> > does 'ret' up the deeper side it may encounter RSB entries (predictions for
> > where the 'ret' goes to) which were populated in userspace.
> > 
> > This is problematic if neither SMEP nor KPTI (the latter of which marks
> > userspace pages as NX for the kernel) are active, as malicious code in
> > userspace may then be executed speculatively.
> ...
> 
> Do we have a guarantee that all cpu actually detect the related RSB underflow?
> 
> It wouldn't surprise me if at least some cpu just let it wrap.
> 
> This would means that userspace would see return predictions based
> on the values the kernel 'stuffed' into the RSB to fill it.
> 
> Potentially this leaks a kernel address to userspace.

Yeah, KASLR is dead unless we do a full IBPB before *every* VMLAUNCH or
return to userspace anyway, isn't it? With KPTI we could put the RSB-
stuffer into the syscall trampoline page perhaps...

For this to be a concern for userspace, I think it does have to be true
that only the lower bits are used, which adds a little complexity but
probably isn't insurmountable?



smime.p7s
Description: S/MIME cryptographic signature


RE: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Laight
From: David Woodhouse
> Sent: 14 January 2018 17:04
> x86/retpoline: Fill RSB on context switch for affected CPUs
> 
> On context switch from a shallow call stack to a deeper one, as the CPU
> does 'ret' up the deeper side it may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace.
> 
> This is problematic if neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel) are active, as malicious code in
> userspace may then be executed speculatively.
...

Do we have a guarantee that all cpu actually detect the related RSB underflow?

It wouldn't surprise me if at least some cpu just let it wrap.

This would means that userspace would see return predictions based
on the values the kernel 'stuffed' into the RSB to fill it.

Potentially this leaks a kernel address to userspace.

David



RE: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Laight
From: David Woodhouse
> Sent: 14 January 2018 17:04
> x86/retpoline: Fill RSB on context switch for affected CPUs
> 
> On context switch from a shallow call stack to a deeper one, as the CPU
> does 'ret' up the deeper side it may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace.
> 
> This is problematic if neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel) are active, as malicious code in
> userspace may then be executed speculatively.
...

Do we have a guarantee that all cpu actually detect the related RSB underflow?

It wouldn't surprise me if at least some cpu just let it wrap.

This would means that userspace would see return predictions based
on the values the kernel 'stuffed' into the RSB to fill it.

Potentially this leaks a kernel address to userspace.

David



Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Woodhouse
On Sun, 2018-01-14 at 16:05 -0800, Andi Kleen wrote:
> > + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > + pr_info("Filling RSB on context switch\n");
> > + }
> 
> Missing an option to turn this off.

Deliberately so. You can already boot with 'spectre_v2=off' to turn off
the mitigations. We are not intending to permit all the bullshit micro-
management of IBRS=3/IBPB=2/RSB=π nonsense.

If you choose retpoline, you get the RSB stuffing which is appropriate
along with that. With IBRS, you get the RSB stuffing which is
appropriate with that. You don't get command line or sysfs tunables to
mess it. You *do* have the source code, if you really want to make
changes. Don't.

smime.p7s
Description: S/MIME cryptographic signature


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-15 Thread David Woodhouse
On Sun, 2018-01-14 at 16:05 -0800, Andi Kleen wrote:
> > + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > + pr_info("Filling RSB on context switch\n");
> > + }
> 
> Missing an option to turn this off.

Deliberately so. You can already boot with 'spectre_v2=off' to turn off
the mitigations. We are not intending to permit all the bullshit micro-
management of IBRS=3/IBPB=2/RSB=π nonsense.

If you choose retpoline, you get the RSB stuffing which is appropriate
along with that. With IBRS, you get the RSB stuffing which is
appropriate with that. You don't get command line or sysfs tunables to
mess it. You *do* have the source code, if you really want to make
changes. Don't.

smime.p7s
Description: S/MIME cryptographic signature


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Andi Kleen
On Sun, Jan 14, 2018 at 04:05:54PM -0800, Andi Kleen wrote:
> > +   if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > +   setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > +   pr_info("Filling RSB on context switch\n");
> > +   }
> 
> Missing an option to turn this off.

My earlier patch did this properly by folding it 
into the big option parser.

https://marc.info/?l=linux-kernel=151578282016915=2

-Andi




Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Andi Kleen
On Sun, Jan 14, 2018 at 04:05:54PM -0800, Andi Kleen wrote:
> > +   if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > +   setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > +   pr_info("Filling RSB on context switch\n");
> > +   }
> 
> Missing an option to turn this off.

My earlier patch did this properly by folding it 
into the big option parser.

https://marc.info/?l=linux-kernel=151578282016915=2

-Andi




Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Andi Kleen
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");
> + }

Missing an option to turn this off.

-Andi


Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Andi Kleen
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");
> + }

Missing an option to turn this off.

-Andi


[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread tip-bot for David Woodhouse
Commit-ID:  c995efd5a740d9cbafbf58bde4973e8b50b4d761
Gitweb: https://git.kernel.org/tip/c995efd5a740d9cbafbf58bde4973e8b50b4d761
Author: David Woodhouse <d...@amazon.co.uk>
AuthorDate: Fri, 12 Jan 2018 17:49:25 +
Committer:  Thomas Gleixner <t...@linutronix.de>
CommitDate: Mon, 15 Jan 2018 00:32:44 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse <d...@amazon.co.uk>
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Acked-by: Arjan van de Ven <ar...@linux.intel.com>
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel <r...@redhat.com>
Cc: Andi Kleen <a...@linux.intel.com>
Cc: Josh Poimboeuf <jpoim...@redhat.com>
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Jiri Kosina <ji...@kernel.org>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Kees Cook <keesc...@google.com>
Cc: Tim Chen <tim.c.c...@linux.intel.com>
Cc: Greg Kroah-Hartman <gre...@linux-foundation.org>
Cc: Paul Turner <p...@google.com>
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk

---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 36 
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */

[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread tip-bot for David Woodhouse
Commit-ID:  c995efd5a740d9cbafbf58bde4973e8b50b4d761
Gitweb: https://git.kernel.org/tip/c995efd5a740d9cbafbf58bde4973e8b50b4d761
Author: David Woodhouse 
AuthorDate: Fri, 12 Jan 2018 17:49:25 +
Committer:  Thomas Gleixner 
CommitDate: Mon, 15 Jan 2018 00:32:44 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk

---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 36 
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..390b3dc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB ha

[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread tip-bot for David Woodhouse
Commit-ID:  a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Gitweb: https://git.kernel.org/tip/a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Author: David Woodhouse <d...@amazon.co.uk>
AuthorDate: Fri, 12 Jan 2018 17:49:25 +
Committer:  Thomas Gleixner <t...@linutronix.de>
CommitDate: Sun, 14 Jan 2018 16:41:39 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse <d...@amazon.co.uk>
Signed-off-by: Thomas Gleixner <t...@linutronix.de>
Acked-by: Arjan van de Ven <ar...@linux.intel.com>
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel <r...@redhat.com>
Cc: Andi Kleen <a...@linux.intel.com>
Cc: Josh Poimboeuf <jpoim...@redhat.com>
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra <pet...@infradead.org>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Jiri Kosina <ji...@kernel.org>
Cc: Andy Lutomirski <l...@amacapital.net>
Cc: Dave Hansen <dave.han...@intel.com>
Cc: Kees Cook <keesc...@google.com>
Cc: Tim Chen <tim.c.c...@linux.intel.com>
Cc: Greg Kroah-Hartman <gre...@linux-foundation.org>
Cc: Paul Turner <p...@google.com>
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk

---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 36 
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */

[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread tip-bot for David Woodhouse
Commit-ID:  a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Gitweb: https://git.kernel.org/tip/a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3
Author: David Woodhouse 
AuthorDate: Fri, 12 Jan 2018 17:49:25 +
Committer:  Thomas Gleixner 
CommitDate: Sun, 14 Jan 2018 16:41:39 +0100

x86/retpoline: Fill RSB on context switch for affected CPUs

On context switch from a shallow call stack to a deeper one, as the CPU
does 'ret' up the deeper side it may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace.

This is problematic if neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel) are active, as malicious code in
userspace may then be executed speculatively.

Overwrite the CPU's return prediction stack with calls which are predicted
to return to an infinite loop, to "capture" speculation if this
happens. This is required both for retpoline, and also in conjunction with
IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so much
overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

[ tglx: Added missing vendor check and slighty massaged comments and
changelog ]

Signed-off-by: David Woodhouse 
Signed-off-by: Thomas Gleixner 
Acked-by: Arjan van de Ven 
Cc: gno...@lxorguk.ukuu.org.uk
Cc: Rik van Riel 
Cc: Andi Kleen 
Cc: Josh Poimboeuf 
Cc: thomas.lenda...@amd.com
Cc: Peter Zijlstra 
Cc: Linus Torvalds 
Cc: Jiri Kosina 
Cc: Andy Lutomirski 
Cc: Dave Hansen 
Cc: Kees Cook 
Cc: Tim Chen 
Cc: Greg Kroah-Hartman 
Cc: Paul Turner 
Link: 
https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk

---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 36 
 4 files changed, 59 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..60c4c34 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..d54a0ed 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When switching from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..390b3dc 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,23 @@ disable:
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB ha

Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Thomas Gleixner
On Fri, 12 Jan 2018, David Woodhouse wrote:
> +/* Check for Skylake-like CPUs (for RSB handling) */
> +static bool __init is_skylake_era(void)
> +{
> + if (boot_cpu_data.x86 == 6) {

This wants a checkfor vendor = intel 

> + switch (boot_cpu_data.x86_model) {
> + case INTEL_FAM6_SKYLAKE_MOBILE:
> + case INTEL_FAM6_SKYLAKE_DESKTOP:
> + case INTEL_FAM6_SKYLAKE_X:
> + case INTEL_FAM6_KABYLAKE_MOBILE:
> + case INTEL_FAM6_KABYLAKE_DESKTOP:
> + return true;
> + }
> + }

Thanks,

tglx


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-14 Thread Thomas Gleixner
On Fri, 12 Jan 2018, David Woodhouse wrote:
> +/* Check for Skylake-like CPUs (for RSB handling) */
> +static bool __init is_skylake_era(void)
> +{
> + if (boot_cpu_data.x86 == 6) {

This wants a checkfor vendor = intel 

> + switch (boot_cpu_data.x86_model) {
> + case INTEL_FAM6_SKYLAKE_MOBILE:
> + case INTEL_FAM6_SKYLAKE_DESKTOP:
> + case INTEL_FAM6_SKYLAKE_X:
> + case INTEL_FAM6_KABYLAKE_MOBILE:
> + case INTEL_FAM6_KABYLAKE_DESKTOP:
> + return true;
> + }
> + }

Thanks,

tglx


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Josh Poimboeuf
On Fri, Jan 12, 2018 at 06:56:18PM +, David Woodhouse wrote:
> On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote:
> > 
> > If you unconditionally fill the RSB on every entry to supervisor mode,
> > then there are never guest-controlled RSB values to be found.
> > 
> > With that property (and IBRS to protect Skylake+), you shouldn't need
> > RSB filling anywhere in the middle.
> 
> Yes, that's right.
> 
> We have a choice — we can do it on kernel entry (in the interrupt and
> syscall and NMI paths), and that's nice and easy and really safe
> because we know there's *never* a bad RSB entry lurking while we're in
> the kernel.
> 
> The alternative, which is what we seem to be learning towards now in
> the latest tables from Dave (https://goo.gl/pXbvBE and
> https://goo.gl/Grbuhf), is to do it on context switch when we might be
> switching from a shallow call stack to a deeper one. Which has much
> better performance characteristics for processes which make non-
> sleeping syscalls.
> 
> The caveat with the latter approach is that we do depend on the fact
> that context switches are the only imbalance in the kernel. But that's
> OK — we don't have a longjmp or anything else like that. Especially
> that goes into a *deeper* call stack. Do we?

At least some generated code might create RSB imbalances.  Function
graph tracing and kretprobes, for example.  They mess with the return
path and could probably underflow the RSB pretty easily.  I guess they'd
need to be reworked a bit so they only do a single ret.

-- 
Josh


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Josh Poimboeuf
On Fri, Jan 12, 2018 at 06:56:18PM +, David Woodhouse wrote:
> On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote:
> > 
> > If you unconditionally fill the RSB on every entry to supervisor mode,
> > then there are never guest-controlled RSB values to be found.
> > 
> > With that property (and IBRS to protect Skylake+), you shouldn't need
> > RSB filling anywhere in the middle.
> 
> Yes, that's right.
> 
> We have a choice — we can do it on kernel entry (in the interrupt and
> syscall and NMI paths), and that's nice and easy and really safe
> because we know there's *never* a bad RSB entry lurking while we're in
> the kernel.
> 
> The alternative, which is what we seem to be learning towards now in
> the latest tables from Dave (https://goo.gl/pXbvBE and
> https://goo.gl/Grbuhf), is to do it on context switch when we might be
> switching from a shallow call stack to a deeper one. Which has much
> better performance characteristics for processes which make non-
> sleeping syscalls.
> 
> The caveat with the latter approach is that we do depend on the fact
> that context switches are the only imbalance in the kernel. But that's
> OK — we don't have a longjmp or anything else like that. Especially
> that goes into a *deeper* call stack. Do we?

At least some generated code might create RSB imbalances.  Function
graph tracing and kretprobes, for example.  They mess with the return
path and could probably underflow the RSB pretty easily.  I guess they'd
need to be reworked a bit so they only do a single ret.

-- 
Josh


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote:
> 
> If you unconditionally fill the RSB on every entry to supervisor mode,
> then there are never guest-controlled RSB values to be found.
> 
> With that property (and IBRS to protect Skylake+), you shouldn't need
> RSB filling anywhere in the middle.

Yes, that's right.

We have a choice — we can do it on kernel entry (in the interrupt and
syscall and NMI paths), and that's nice and easy and really safe
because we know there's *never* a bad RSB entry lurking while we're in
the kernel.

The alternative, which is what we seem to be learning towards now in
the latest tables from Dave (https://goo.gl/pXbvBE and
https://goo.gl/Grbuhf), is to do it on context switch when we might be
switching from a shallow call stack to a deeper one. Which has much
better performance characteristics for processes which make non-
sleeping syscalls.

The caveat with the latter approach is that we do depend on the fact
that context switches are the only imbalance in the kernel. But that's
OK — we don't have a longjmp or anything else like that. Especially
that goes into a *deeper* call stack. Do we?

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote:
> 
> If you unconditionally fill the RSB on every entry to supervisor mode,
> then there are never guest-controlled RSB values to be found.
> 
> With that property (and IBRS to protect Skylake+), you shouldn't need
> RSB filling anywhere in the middle.

Yes, that's right.

We have a choice — we can do it on kernel entry (in the interrupt and
syscall and NMI paths), and that's nice and easy and really safe
because we know there's *never* a bad RSB entry lurking while we're in
the kernel.

The alternative, which is what we seem to be learning towards now in
the latest tables from Dave (https://goo.gl/pXbvBE and
https://goo.gl/Grbuhf), is to do it on context switch when we might be
switching from a shallow call stack to a deeper one. Which has much
better performance characteristics for processes which make non-
sleeping syscalls.

The caveat with the latter approach is that we do depend on the fact
that context switches are the only imbalance in the kernel. But that's
OK — we don't have a longjmp or anything else like that. Especially
that goes into a *deeper* call stack. Do we?

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
On Fri, 2018-01-12 at 10:02 -0800, Andi Kleen wrote:
> > + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > + pr_info("Filling RSB on context switch\n");
> 
> We need to do more things for Skylake (like idle and interrupt fill
> and possibly deep call cahin), so I don't think it makes sense to
> 
> - have an individual flag for each of these. It can be just a single
> flag that enables all of this for Skylake
> 
> - print something for each of them. that will just be very noisy
> without any useful benefit to the user.

I still think we are better off using IBRS by default on Skylake.

This patch wasn't really for Skylake; the real use case was for AMD
CPUs (!PTI) without SMEP. Since it happens to needed on Skylake too we
might as well enable it there... but that doesn't mean I was planning
to do all the other horrible crap we need for Skylake.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
On Fri, 2018-01-12 at 10:02 -0800, Andi Kleen wrote:
> > + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> > +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> > + pr_info("Filling RSB on context switch\n");
> 
> We need to do more things for Skylake (like idle and interrupt fill
> and possibly deep call cahin), so I don't think it makes sense to
> 
> - have an individual flag for each of these. It can be just a single
> flag that enables all of this for Skylake
> 
> - print something for each of them. that will just be very noisy
> without any useful benefit to the user.

I still think we are better off using IBRS by default on Skylake.

This patch wasn't really for Skylake; the real use case was for AMD
CPUs (!PTI) without SMEP. Since it happens to needed on Skylake too we
might as well enable it there... but that doesn't mean I was planning
to do all the other horrible crap we need for Skylake.

smime.p7s
Description: S/MIME cryptographic signature


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Andrew Cooper
On 12/01/18 17:49, David Woodhouse wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
>
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.

If you unconditionally fill the RSB on every entry to supervisor mode,
then there are never guest-controlled RSB values to be found.

With that property (and IBRS to protect Skylake+), you shouldn't need
RSB filling anywhere in the middle.

~Andrew


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Andrew Cooper
On 12/01/18 17:49, David Woodhouse wrote:
> When we context switch from a shallow call stack to a deeper one, as we
> 'ret' up the deeper side we may encounter RSB entries (predictions for
> where the 'ret' goes to) which were populated in userspace. This is
> problematic if we have neither SMEP nor KPTI (the latter of which marks
> userspace pages as NX for the kernel), as malicious code in userspace
> may then be executed speculatively. So overwrite the CPU's return
> prediction stack with calls which are predicted to return to an infinite
> loop, to "capture" speculation if this happens. This is required both
> for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.
>
> On Skylake+ the problem is slightly different, and an *underflow* of the
> RSB may cause errant branch predictions to occur. So there it's not so
> much overwrite, as *filling* the RSB to attempt to prevent it getting
> empty. This is only a partial solution for Skylake+ since there are many
> other conditions which may result in the RSB becoming empty. The full
> solution on Skylake+ is to use IBRS, which will prevent the problem even
> when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
> required on context switch.

If you unconditionally fill the RSB on every entry to supervisor mode,
then there are never guest-controlled RSB values to be found.

With that property (and IBRS to protect Skylake+), you shouldn't need
RSB filling anywhere in the middle.

~Andrew


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Andi Kleen
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");

We need to do more things for Skylake (like idle and interrupt fill
and possibly deep call cahin), so I don't think it makes sense to

- have an individual flag for each of these. It can be just a single
flag that enables all of this for Skylake

- print something for each of them. that will just be very noisy
without any useful benefit to the user.

-Andi


Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread Andi Kleen
> + if ((!boot_cpu_has(X86_FEATURE_PTI) &&
> +  !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) {
> + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW);
> + pr_info("Filling RSB on context switch\n");

We need to do more things for Skylake (like idle and interrupt fill
and possibly deep call cahin), so I don't think it makes sense to

- have an individual flag for each of these. It can be just a single
flag that enables all of this for Skylake

- print something for each of them. that will just be very noisy
without any useful benefit to the user.

-Andi


[PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
When we context switch from a shallow call stack to a deeper one, as we
'ret' up the deeper side we may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace. This is
problematic if we have neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel), as malicious code in userspace
may then be executed speculatively. So overwrite the CPU's return
prediction stack with calls which are predicted to return to an infinite
loop, to "capture" speculation if this happens. This is required both
for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so
much overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

Signed-off-by: David Woodhouse 
Acked-by: Arjan van de Ven 
---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 34 ++
 4 files changed, 57 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..ef0e478 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When we switch from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..b2937d8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When we switch from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..c17cce3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init 
spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   if (boot_cpu_data.x86 == 6) {
+   switch (boot_cpu_data.x86_model) {
+   case INTEL_FAM6_SKYLAKE_MOBILE:
+   case INTEL_FAM6_SKYLAKE_DESKTOP:
+   case INTEL_FAM6_SKYLAKE_X:
+   case INTEL_FAM6_KABYLAKE_MOBILE:
+   case INTEL_FAM6_KABYLAKE_DESKTOP:
+   return true;
+   }
+   }
+   return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
 
spectre_v2_enabled = mode;
pr_info("%s\n", 

[PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs

2018-01-12 Thread David Woodhouse
When we context switch from a shallow call stack to a deeper one, as we
'ret' up the deeper side we may encounter RSB entries (predictions for
where the 'ret' goes to) which were populated in userspace. This is
problematic if we have neither SMEP nor KPTI (the latter of which marks
userspace pages as NX for the kernel), as malicious code in userspace
may then be executed speculatively. So overwrite the CPU's return
prediction stack with calls which are predicted to return to an infinite
loop, to "capture" speculation if this happens. This is required both
for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI.

On Skylake+ the problem is slightly different, and an *underflow* of the
RSB may cause errant branch predictions to occur. So there it's not so
much overwrite, as *filling* the RSB to attempt to prevent it getting
empty. This is only a partial solution for Skylake+ since there are many
other conditions which may result in the RSB becoming empty. The full
solution on Skylake+ is to use IBRS, which will prevent the problem even
when the RSB becomes empty. With IBRS, the RSB-stuffing will not be
required on context switch.

Signed-off-by: David Woodhouse 
Acked-by: Arjan van de Ven 
---
 arch/x86/entry/entry_32.S  | 11 +++
 arch/x86/entry/entry_64.S  | 11 +++
 arch/x86/include/asm/cpufeatures.h |  1 +
 arch/x86/kernel/cpu/bugs.c | 34 ++
 4 files changed, 57 insertions(+)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index a1f28a5..ef0e478 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -244,6 +244,17 @@ ENTRY(__switch_to_asm)
movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When we switch from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popl%esi
popl%edi
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 59874bc..b2937d8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -487,6 +487,17 @@ ENTRY(__switch_to_asm)
movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
 #endif
 
+#ifdef CONFIG_RETPOLINE
+   /*
+* When we switch from a shallower to a deeper call stack
+* the RSB may either underflow or use entries populated
+* with userspace addresses. On CPUs where those concerns
+* exist, overwrite the RSB with entries which capture
+* speculative execution to prevent attack.
+*/
+   FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+#endif
+
/* restore callee-saved registers */
popq%r15
popq%r14
diff --git a/arch/x86/include/asm/cpufeatures.h 
b/arch/x86/include/asm/cpufeatures.h
index f275447..aa09559 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -211,6 +211,7 @@
 #define X86_FEATURE_AVX512_4FMAPS  ( 7*32+17) /* AVX-512 Multiply 
Accumulation Single precision */
 
 #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth 
Allocation */
+#define X86_FEATURE_RSB_CTXSW  ( 7*32+19) /* Fill RSB on context 
switches */
 
 /* Virtualization flags: Linux defined, word 8 */
 #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index e4dc261..c17cce3 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -23,6 +23,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static void __init spectre_v2_select_mitigation(void);
 
@@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init 
spectre_v2_parse_cmdline(void)
return SPECTRE_V2_CMD_NONE;
 }
 
+/* Check for Skylake-like CPUs (for RSB handling) */
+static bool __init is_skylake_era(void)
+{
+   if (boot_cpu_data.x86 == 6) {
+   switch (boot_cpu_data.x86_model) {
+   case INTEL_FAM6_SKYLAKE_MOBILE:
+   case INTEL_FAM6_SKYLAKE_DESKTOP:
+   case INTEL_FAM6_SKYLAKE_X:
+   case INTEL_FAM6_KABYLAKE_MOBILE:
+   case INTEL_FAM6_KABYLAKE_DESKTOP:
+   return true;
+   }
+   }
+   return false;
+}
+
 static void __init spectre_v2_select_mitigation(void)
 {
enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline();
@@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void)
 
spectre_v2_enabled = mode;
pr_info("%s\n", spectre_v2_strings[mode]);
+
+   /*
+