[PATCH 3.16 41/76] x86/retpoline: Fill RSB on context switch for affected CPUs
3.16.56-rc1 review patch. If anyone has any objections, please let me know. -- From: David Woodhousecommit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby [bwh: Backported to 3.16: use the first available feature number] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/switch_to.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -187,6 +187,7 @@ #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_RSB_CTXSW (7*32+11) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE (7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SWITCH_TO_H #define _ASM_X86_SWITCH_TO_H +#include + struct task_struct; /* one of the stranger aspects of C forward declarations */ __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +65,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl
[PATCH 3.16 41/76] x86/retpoline: Fill RSB on context switch for affected CPUs
3.16.56-rc1 review patch. If anyone has any objections, please let me know. -- From: David Woodhouse commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby [bwh: Backported to 3.16: use the first available feature number] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/switch_to.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -187,6 +187,7 @@ #define X86_FEATURE_HW_PSTATE (7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_RSB_CTXSW (7*32+11) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE (7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SWITCH_TO_H #define _ASM_X86_SWITCH_TO_H +#include + struct task_struct; /* one of the stranger aspects of C forward declarations */ __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +65,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ __switch_canary\ +__retpoline_fill_return_buffer \ "jmp __switch_to\n"/* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore
[PATCH 3.2 075/104] x86/retpoline: Fill RSB on context switch for affected CPUs
3.2.101-rc1 review patch. If anyone has any objections, please let me know. -- From: David Woodhousecommit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby [bwh: Backported to 3.2: - Use the first available feature number - Adjust filename] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/system.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -178,6 +178,7 @@ #define X86_FEATURE_PTS(7*32+ 6) /* Intel Package Thermal Status */ #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_RSB_CTXSW (7*32+9) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE (7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,23 @@ extern void show_regs_common(void); #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -63,6 +81,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ __switch_canary\ +__retpoline_fill_return_buffer \
[PATCH 3.2 075/104] x86/retpoline: Fill RSB on context switch for affected CPUs
3.2.101-rc1 review patch. If anyone has any objections, please let me know. -- From: David Woodhouse commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby [bwh: Backported to 3.2: - Use the first available feature number - Adjust filename] Signed-off-by: Ben Hutchings --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/system.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -178,6 +178,7 @@ #define X86_FEATURE_PTS(7*32+ 6) /* Intel Package Thermal Status */ #define X86_FEATURE_DTHERM (7*32+ 7) /* Digital Thermal Sensor */ #define X86_FEATURE_INVPCID_SINGLE (7*32+ 8) /* Effectively INVPCID && CR4.PCIDE=1 */ +#define X86_FEATURE_RSB_CTXSW (7*32+9) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE (7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD (7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,23 @@ extern void show_regs_common(void); #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -63,6 +81,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ __switch_canary\ +__retpoline_fill_return_buffer \ "jmp __switch_to\n"/* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ @@ -117,6 +136,23 @@ do { \ #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
On 09.03.2018 16:14, Andi Kleen wrote: >> Shouldn't the RSB filling on context switch also be done on non-IBPB >> CPUs to protect (retpolined) user space tasks from other user space >> tasks? > > The comment is actually incorrect. There's no risk to hit user space > addresses if we have KPTI and NX (which is fairly universal). > > It's mainly needed on Skylake era CPUs. > > Should fix the comment. I'll send a patch. But what about userspace-to-userspace attacks? - the ones that IBPB on context switches currently protects against (at least for high-value, or as implemented currently, non-dumpable, processes)? If understand the issue correctly, high-value user space processes can be protected from other user space processes even on CPUs that lack IBPB as long as they are recompiled with retpolines and there is no danger of RSB entries from one process being used by another one after a context switch. For Skyklake this would not be enough, but there we'll (hopefully) have the IBPB instead. > -Andi > Maciej
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
On 09.03.2018 16:14, Andi Kleen wrote: >> Shouldn't the RSB filling on context switch also be done on non-IBPB >> CPUs to protect (retpolined) user space tasks from other user space >> tasks? > > The comment is actually incorrect. There's no risk to hit user space > addresses if we have KPTI and NX (which is fairly universal). > > It's mainly needed on Skylake era CPUs. > > Should fix the comment. I'll send a patch. But what about userspace-to-userspace attacks? - the ones that IBPB on context switches currently protects against (at least for high-value, or as implemented currently, non-dumpable, processes)? If understand the issue correctly, high-value user space processes can be protected from other user space processes even on CPUs that lack IBPB as long as they are recompiled with retpolines and there is no danger of RSB entries from one process being used by another one after a context switch. For Skyklake this would not be enough, but there we'll (hopefully) have the IBPB instead. > -Andi > Maciej
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
> Shouldn't the RSB filling on context switch also be done on non-IBPB > CPUs to protect (retpolined) user space tasks from other user space > tasks? The comment is actually incorrect. There's no risk to hit user space addresses if we have KPTI and NX (which is fairly universal). It's mainly needed on Skylake era CPUs. Should fix the comment. I'll send a patch. -Andi
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
> Shouldn't the RSB filling on context switch also be done on non-IBPB > CPUs to protect (retpolined) user space tasks from other user space > tasks? The comment is actually incorrect. There's no risk to hit user space addresses if we have KPTI and NX (which is fairly universal). It's mainly needed on Skylake era CPUs. Should fix the comment. I'll send a patch. -Andi
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
On 12.01.2018 18:49, Woodhouse, David wrote: > When we context switch from a shallow call stack to a deeper one, as we > 'ret' up the deeper side we may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. This is > problematic if we have neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel), as malicious code in userspace > may then be executed speculatively. So overwrite the CPU's return > prediction stack with calls which are predicted to return to an infinite > loop, to "capture" speculation if this happens. This is required both > for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. > > On Skylake+ the problem is slightly different, and an *underflow* of the > RSB may cause errant branch predictions to occur. So there it's not so > much overwrite, as *filling* the RSB to attempt to prevent it getting > empty. This is only a partial solution for Skylake+ since there are many > other conditions which may result in the RSB becoming empty. The full > solution on Skylake+ is to use IBRS, which will prevent the problem even > when the RSB becomes empty. With IBRS, the RSB-stuffing will not be > required on context switch. > > Signed-off-by: David Woodhouse> Acked-by: Arjan van de Ven > --- (..) > @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void) > > spectre_v2_enabled = mode; > pr_info("%s\n", spectre_v2_strings[mode]); > + > + /* > + * If we don't have SMEP or KPTI, then we run the risk of hitting > + * userspace addresses in the RSB after a context switch from a > + * shallow call stack to a deeper one. We must must fill the entire > + * RSB to avoid that, even when using IBRS. > + * > + * Skylake era CPUs have a separate issue with *underflow* of the > + * RSB, when they will predict 'ret' targets from the generic BTB. > + * IBRS makes that safe, but we need to fill the RSB on context > + * switch if we're using retpoline. > + */ > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); > + } Shouldn't the RSB filling on context switch also be done on non-IBPB CPUs to protect (retpolined) user space tasks from other user space tasks? We already issue a IBPB when switching to high-value user space tasks to protect them from other user space tasks. Thanks, Maciej
Re: x86/retpoline: Fill RSB on context switch for affected CPUs
On 12.01.2018 18:49, Woodhouse, David wrote: > When we context switch from a shallow call stack to a deeper one, as we > 'ret' up the deeper side we may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. This is > problematic if we have neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel), as malicious code in userspace > may then be executed speculatively. So overwrite the CPU's return > prediction stack with calls which are predicted to return to an infinite > loop, to "capture" speculation if this happens. This is required both > for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. > > On Skylake+ the problem is slightly different, and an *underflow* of the > RSB may cause errant branch predictions to occur. So there it's not so > much overwrite, as *filling* the RSB to attempt to prevent it getting > empty. This is only a partial solution for Skylake+ since there are many > other conditions which may result in the RSB becoming empty. The full > solution on Skylake+ is to use IBRS, which will prevent the problem even > when the RSB becomes empty. With IBRS, the RSB-stuffing will not be > required on context switch. > > Signed-off-by: David Woodhouse > Acked-by: Arjan van de Ven > --- (..) > @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void) > > spectre_v2_enabled = mode; > pr_info("%s\n", spectre_v2_strings[mode]); > + > + /* > + * If we don't have SMEP or KPTI, then we run the risk of hitting > + * userspace addresses in the RSB after a context switch from a > + * shallow call stack to a deeper one. We must must fill the entire > + * RSB to avoid that, even when using IBRS. > + * > + * Skylake era CPUs have a separate issue with *underflow* of the > + * RSB, when they will predict 'ret' targets from the generic BTB. > + * IBRS makes that safe, but we need to fill the RSB on context > + * switch if we're using retpoline. > + */ > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); > + } Shouldn't the RSB filling on context switch also be done on non-IBPB CPUs to protect (retpolined) user space tasks from other user space tasks? We already issue a IBPB when switching to high-value user space tasks to protect them from other user space tasks. Thanks, Maciej
[PATCH 4.4 07/74] x86/retpoline: Fill RSB on context switch for affected CPUs
4.4-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhousecommit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/switch_to.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -199,6 +199,7 @@ #define X86_FEATURE_HWP_EPP( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SWITCH_TO_H #define _ASM_X86_SWITCH_TO_H +#include + struct task_struct; /* one of the stranger aspects of C forward declarations */ __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +65,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ __switch_canary
[PATCH 4.4 07/74] x86/retpoline: Fill RSB on context switch for affected CPUs
4.4-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhouse commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] [js] backport to 4.4 -- __switch_to_asm does not exist there, we have to patch the switch_to macros for both x86_32 and x86_64. Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Jiri Slaby Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/cpufeature.h |1 + arch/x86/include/asm/switch_to.h | 38 ++ arch/x86/kernel/cpu/bugs.c| 36 3 files changed, 75 insertions(+) --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -199,6 +199,7 @@ #define X86_FEATURE_HWP_EPP( 7*32+13) /* Intel HWP_EPP */ #define X86_FEATURE_HWP_PKG_REQ ( 7*32+14) /* Intel HWP_PKG_REQ */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ #define X86_FEATURE_RETPOLINE ( 7*32+29) /* Generic Retpoline mitigation for Spectre variant 2 */ #define X86_FEATURE_RETPOLINE_AMD ( 7*32+30) /* AMD Retpoline mitigation for Spectre variant 2 */ --- a/arch/x86/include/asm/switch_to.h +++ b/arch/x86/include/asm/switch_to.h @@ -1,6 +1,8 @@ #ifndef _ASM_X86_SWITCH_TO_H #define _ASM_X86_SWITCH_TO_H +#include + struct task_struct; /* one of the stranger aspects of C forward declarations */ __visible struct task_struct *__switch_to(struct task_struct *prev, struct task_struct *next); @@ -24,6 +26,23 @@ void __switch_to_xtra(struct task_struct #define __switch_canary_iparam #endif /* CC_STACKPROTECTOR */ +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ +#define __retpoline_fill_return_buffer \ + ALTERNATIVE("jmp 910f", \ + __stringify(__FILL_RETURN_BUFFER(%%ebx, RSB_CLEAR_LOOPS, %%esp)),\ + X86_FEATURE_RSB_CTXSW) \ + "910:\n\t" +#else +#define __retpoline_fill_return_buffer +#endif + /* * Saving eflags is important. It switches not only IOPL between tasks, * it also protects other tasks from NT leaking through sysenter etc. @@ -46,6 +65,7 @@ do { \ "movl $1f,%[prev_ip]\n\t" /* saveEIP */ \ "pushl %[next_ip]\n\t" /* restore EIP */ \ __switch_canary\ +__retpoline_fill_return_buffer \ "jmp __switch_to\n"/* regparm call */ \ "1:\t" \ "popl %%ebp\n\t" /* restore EBP */ \ @@ -100,6 +120,23 @@ do {
[PATCH 4.14 32/89] x86/retpoline: Fill RSB on context switch for affected CPUs
4.14-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhousecommit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h |1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { +
[PATCH 4.14 32/89] x86/retpoline: Fill RSB on context switch for affected CPUs
4.14-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhouse commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h |1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static
[PATCH 4.9 14/47] x86/retpoline: Fill RSB on context switch for affected CPUs
4.9-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhousecommit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h |1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -200,6 +200,7 @@ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,6 +22,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -154,6 +155,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ +
[PATCH 4.9 14/47] x86/retpoline: Fill RSB on context switch for affected CPUs
4.9-stable review patch. If anyone has any objections, please let me know. -- From: David Woodhouse commit c995efd5a740d9cbafbf58bde4973e8b50b4d761 upstream. On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk Signed-off-by: Greg Kroah-Hartman --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h |1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -229,6 +229,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -427,6 +427,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -200,6 +200,7 @@ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */ #define X86_FEATURE_KAISER ( 7*32+31) /* CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */ --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -22,6 +22,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -154,6 +155,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP:
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Mon, Jan 15, 2018 at 6:42 AM, Arjan van de Venwrote: >> >> This would means that userspace would see return predictions based >> on the values the kernel 'stuffed' into the RSB to fill it. >> >> Potentially this leaks a kernel address to userspace. > > > KASLR pretty much died in May this year to be honest with the KAISER paper > (if not before then) KASLR was always on shaky ground for local attacks. For pure remote attacks, it's still useful. And for driving forward research, it appears to be quite useful. ;) -Kees -- Kees Cook Pixel Security
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Mon, Jan 15, 2018 at 6:42 AM, Arjan van de Ven wrote: >> >> This would means that userspace would see return predictions based >> on the values the kernel 'stuffed' into the RSB to fill it. >> >> Potentially this leaks a kernel address to userspace. > > > KASLR pretty much died in May this year to be honest with the KAISER paper > (if not before then) KASLR was always on shaky ground for local attacks. For pure remote attacks, it's still useful. And for driving forward research, it appears to be quite useful. ;) -Kees -- Kees Cook Pixel Security
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
This would means that userspace would see return predictions based on the values the kernel 'stuffed' into the RSB to fill it. Potentially this leaks a kernel address to userspace. KASLR pretty much died in May this year to be honest with the KAISER paper (if not before then) also with KPTI the address won't have a TLB mapping so it wouldn't actually be speculated into.
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
This would means that userspace would see return predictions based on the values the kernel 'stuffed' into the RSB to fill it. Potentially this leaks a kernel address to userspace. KASLR pretty much died in May this year to be honest with the KAISER paper (if not before then) also with KPTI the address won't have a TLB mapping so it wouldn't actually be speculated into.
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Mon, 2018-01-15 at 14:35 +, David Laight wrote: > From: David Woodhouse > > > > Sent: 14 January 2018 17:04 > > x86/retpoline: Fill RSB on context switch for affected CPUs > > > > On context switch from a shallow call stack to a deeper one, as the CPU > > does 'ret' up the deeper side it may encounter RSB entries (predictions for > > where the 'ret' goes to) which were populated in userspace. > > > > This is problematic if neither SMEP nor KPTI (the latter of which marks > > userspace pages as NX for the kernel) are active, as malicious code in > > userspace may then be executed speculatively. > ... > > Do we have a guarantee that all cpu actually detect the related RSB underflow? > > It wouldn't surprise me if at least some cpu just let it wrap. > > This would means that userspace would see return predictions based > on the values the kernel 'stuffed' into the RSB to fill it. > > Potentially this leaks a kernel address to userspace. Yeah, KASLR is dead unless we do a full IBPB before *every* VMLAUNCH or return to userspace anyway, isn't it? With KPTI we could put the RSB- stuffer into the syscall trampoline page perhaps... For this to be a concern for userspace, I think it does have to be true that only the lower bits are used, which adds a little complexity but probably isn't insurmountable? smime.p7s Description: S/MIME cryptographic signature
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Mon, 2018-01-15 at 14:35 +, David Laight wrote: > From: David Woodhouse > > > > Sent: 14 January 2018 17:04 > > x86/retpoline: Fill RSB on context switch for affected CPUs > > > > On context switch from a shallow call stack to a deeper one, as the CPU > > does 'ret' up the deeper side it may encounter RSB entries (predictions for > > where the 'ret' goes to) which were populated in userspace. > > > > This is problematic if neither SMEP nor KPTI (the latter of which marks > > userspace pages as NX for the kernel) are active, as malicious code in > > userspace may then be executed speculatively. > ... > > Do we have a guarantee that all cpu actually detect the related RSB underflow? > > It wouldn't surprise me if at least some cpu just let it wrap. > > This would means that userspace would see return predictions based > on the values the kernel 'stuffed' into the RSB to fill it. > > Potentially this leaks a kernel address to userspace. Yeah, KASLR is dead unless we do a full IBPB before *every* VMLAUNCH or return to userspace anyway, isn't it? With KPTI we could put the RSB- stuffer into the syscall trampoline page perhaps... For this to be a concern for userspace, I think it does have to be true that only the lower bits are used, which adds a little complexity but probably isn't insurmountable? smime.p7s Description: S/MIME cryptographic signature
RE: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
From: David Woodhouse > Sent: 14 January 2018 17:04 > x86/retpoline: Fill RSB on context switch for affected CPUs > > On context switch from a shallow call stack to a deeper one, as the CPU > does 'ret' up the deeper side it may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. > > This is problematic if neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel) are active, as malicious code in > userspace may then be executed speculatively. ... Do we have a guarantee that all cpu actually detect the related RSB underflow? It wouldn't surprise me if at least some cpu just let it wrap. This would means that userspace would see return predictions based on the values the kernel 'stuffed' into the RSB to fill it. Potentially this leaks a kernel address to userspace. David
RE: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
From: David Woodhouse > Sent: 14 January 2018 17:04 > x86/retpoline: Fill RSB on context switch for affected CPUs > > On context switch from a shallow call stack to a deeper one, as the CPU > does 'ret' up the deeper side it may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. > > This is problematic if neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel) are active, as malicious code in > userspace may then be executed speculatively. ... Do we have a guarantee that all cpu actually detect the related RSB underflow? It wouldn't surprise me if at least some cpu just let it wrap. This would means that userspace would see return predictions based on the values the kernel 'stuffed' into the RSB to fill it. Potentially this leaks a kernel address to userspace. David
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Sun, 2018-01-14 at 16:05 -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > + } > > Missing an option to turn this off. Deliberately so. You can already boot with 'spectre_v2=off' to turn off the mitigations. We are not intending to permit all the bullshit micro- management of IBRS=3/IBPB=2/RSB=π nonsense. If you choose retpoline, you get the RSB stuffing which is appropriate along with that. With IBRS, you get the RSB stuffing which is appropriate with that. You don't get command line or sysfs tunables to mess it. You *do* have the source code, if you really want to make changes. Don't. smime.p7s Description: S/MIME cryptographic signature
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Sun, 2018-01-14 at 16:05 -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > + } > > Missing an option to turn this off. Deliberately so. You can already boot with 'spectre_v2=off' to turn off the mitigations. We are not intending to permit all the bullshit micro- management of IBRS=3/IBPB=2/RSB=π nonsense. If you choose retpoline, you get the RSB stuffing which is appropriate along with that. With IBRS, you get the RSB stuffing which is appropriate with that. You don't get command line or sysfs tunables to mess it. You *do* have the source code, if you really want to make changes. Don't. smime.p7s Description: S/MIME cryptographic signature
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Sun, Jan 14, 2018 at 04:05:54PM -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > +!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > + } > > Missing an option to turn this off. My earlier patch did this properly by folding it into the big option parser. https://marc.info/?l=linux-kernel=151578282016915=2 -Andi
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
On Sun, Jan 14, 2018 at 04:05:54PM -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > +!boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > + } > > Missing an option to turn this off. My earlier patch did this properly by folding it into the big option parser. https://marc.info/?l=linux-kernel=151578282016915=2 -Andi
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
> + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); > + } Missing an option to turn this off. -Andi
Re: [tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
> + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); > + } Missing an option to turn this off. -Andi
[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
Commit-ID: c995efd5a740d9cbafbf58bde4973e8b50b4d761 Gitweb: https://git.kernel.org/tip/c995efd5a740d9cbafbf58bde4973e8b50b4d761 Author: David Woodhouse <d...@amazon.co.uk> AuthorDate: Fri, 12 Jan 2018 17:49:25 + Committer: Thomas Gleixner <t...@linutronix.de> CommitDate: Mon, 15 Jan 2018 00:32:44 +0100 x86/retpoline: Fill RSB on context switch for affected CPUs On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse <d...@amazon.co.uk> Signed-off-by: Thomas Gleixner <t...@linutronix.de> Acked-by: Arjan van de Ven <ar...@linux.intel.com> Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel <r...@redhat.com> Cc: Andi Kleen <a...@linux.intel.com> Cc: Josh Poimboeuf <jpoim...@redhat.com> Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra <pet...@infradead.org> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Jiri Kosina <ji...@kernel.org> Cc: Andy Lutomirski <l...@amacapital.net> Cc: Dave Hansen <dave.han...@intel.com> Cc: Kees Cook <keesc...@google.com> Cc: Tim Chen <tim.c.c...@linux.intel.com> Cc: Greg Kroah-Hartman <gre...@linux-foundation.org> Cc: Paul Turner <p...@google.com> Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..60c4c34 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..d54a0ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */
[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
Commit-ID: c995efd5a740d9cbafbf58bde4973e8b50b4d761 Gitweb: https://git.kernel.org/tip/c995efd5a740d9cbafbf58bde4973e8b50b4d761 Author: David Woodhouse AuthorDate: Fri, 12 Jan 2018 17:49:25 + Committer: Thomas Gleixner CommitDate: Mon, 15 Jan 2018 00:32:44 +0100 x86/retpoline: Fill RSB on context switch for affected CPUs On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..60c4c34 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..d54a0ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc261..390b3dc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB ha
[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
Commit-ID: a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3 Gitweb: https://git.kernel.org/tip/a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3 Author: David Woodhouse <d...@amazon.co.uk> AuthorDate: Fri, 12 Jan 2018 17:49:25 + Committer: Thomas Gleixner <t...@linutronix.de> CommitDate: Sun, 14 Jan 2018 16:41:39 +0100 x86/retpoline: Fill RSB on context switch for affected CPUs On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse <d...@amazon.co.uk> Signed-off-by: Thomas Gleixner <t...@linutronix.de> Acked-by: Arjan van de Ven <ar...@linux.intel.com> Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel <r...@redhat.com> Cc: Andi Kleen <a...@linux.intel.com> Cc: Josh Poimboeuf <jpoim...@redhat.com> Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra <pet...@infradead.org> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Jiri Kosina <ji...@kernel.org> Cc: Andy Lutomirski <l...@amacapital.net> Cc: Dave Hansen <dave.han...@intel.com> Cc: Kees Cook <keesc...@google.com> Cc: Tim Chen <tim.c.c...@linux.intel.com> Cc: Greg Kroah-Hartman <gre...@linux-foundation.org> Cc: Paul Turner <p...@google.com> Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..60c4c34 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..d54a0ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */
[tip:x86/pti] x86/retpoline: Fill RSB on context switch for affected CPUs
Commit-ID: a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3 Gitweb: https://git.kernel.org/tip/a0ab15c0fb68e202bebd9b17fa49fd7ec48975b3 Author: David Woodhouse AuthorDate: Fri, 12 Jan 2018 17:49:25 + Committer: Thomas Gleixner CommitDate: Sun, 14 Jan 2018 16:41:39 +0100 x86/retpoline: Fill RSB on context switch for affected CPUs On context switch from a shallow call stack to a deeper one, as the CPU does 'ret' up the deeper side it may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel) are active, as malicious code in userspace may then be executed speculatively. Overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. [ tglx: Added missing vendor check and slighty massaged comments and changelog ] Signed-off-by: David Woodhouse Signed-off-by: Thomas Gleixner Acked-by: Arjan van de Ven Cc: gno...@lxorguk.ukuu.org.uk Cc: Rik van Riel Cc: Andi Kleen Cc: Josh Poimboeuf Cc: thomas.lenda...@amd.com Cc: Peter Zijlstra Cc: Linus Torvalds Cc: Jiri Kosina Cc: Andy Lutomirski Cc: Dave Hansen Cc: Kees Cook Cc: Tim Chen Cc: Greg Kroah-Hartman Cc: Paul Turner Link: https://lkml.kernel.org/r/1515779365-9032-1-git-send-email-d...@amazon.co.uk --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 36 4 files changed, 59 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..60c4c34 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..d54a0ed 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When switching from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc261..390b3dc 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,23 @@ disable: return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB ha
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 12 Jan 2018, David Woodhouse wrote: > +/* Check for Skylake-like CPUs (for RSB handling) */ > +static bool __init is_skylake_era(void) > +{ > + if (boot_cpu_data.x86 == 6) { This wants a checkfor vendor = intel > + switch (boot_cpu_data.x86_model) { > + case INTEL_FAM6_SKYLAKE_MOBILE: > + case INTEL_FAM6_SKYLAKE_DESKTOP: > + case INTEL_FAM6_SKYLAKE_X: > + case INTEL_FAM6_KABYLAKE_MOBILE: > + case INTEL_FAM6_KABYLAKE_DESKTOP: > + return true; > + } > + } Thanks, tglx
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 12 Jan 2018, David Woodhouse wrote: > +/* Check for Skylake-like CPUs (for RSB handling) */ > +static bool __init is_skylake_era(void) > +{ > + if (boot_cpu_data.x86 == 6) { This wants a checkfor vendor = intel > + switch (boot_cpu_data.x86_model) { > + case INTEL_FAM6_SKYLAKE_MOBILE: > + case INTEL_FAM6_SKYLAKE_DESKTOP: > + case INTEL_FAM6_SKYLAKE_X: > + case INTEL_FAM6_KABYLAKE_MOBILE: > + case INTEL_FAM6_KABYLAKE_DESKTOP: > + return true; > + } > + } Thanks, tglx
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, Jan 12, 2018 at 06:56:18PM +, David Woodhouse wrote: > On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote: > > > > If you unconditionally fill the RSB on every entry to supervisor mode, > > then there are never guest-controlled RSB values to be found. > > > > With that property (and IBRS to protect Skylake+), you shouldn't need > > RSB filling anywhere in the middle. > > Yes, that's right. > > We have a choice — we can do it on kernel entry (in the interrupt and > syscall and NMI paths), and that's nice and easy and really safe > because we know there's *never* a bad RSB entry lurking while we're in > the kernel. > > The alternative, which is what we seem to be learning towards now in > the latest tables from Dave (https://goo.gl/pXbvBE and > https://goo.gl/Grbuhf), is to do it on context switch when we might be > switching from a shallow call stack to a deeper one. Which has much > better performance characteristics for processes which make non- > sleeping syscalls. > > The caveat with the latter approach is that we do depend on the fact > that context switches are the only imbalance in the kernel. But that's > OK — we don't have a longjmp or anything else like that. Especially > that goes into a *deeper* call stack. Do we? At least some generated code might create RSB imbalances. Function graph tracing and kretprobes, for example. They mess with the return path and could probably underflow the RSB pretty easily. I guess they'd need to be reworked a bit so they only do a single ret. -- Josh
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, Jan 12, 2018 at 06:56:18PM +, David Woodhouse wrote: > On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote: > > > > If you unconditionally fill the RSB on every entry to supervisor mode, > > then there are never guest-controlled RSB values to be found. > > > > With that property (and IBRS to protect Skylake+), you shouldn't need > > RSB filling anywhere in the middle. > > Yes, that's right. > > We have a choice — we can do it on kernel entry (in the interrupt and > syscall and NMI paths), and that's nice and easy and really safe > because we know there's *never* a bad RSB entry lurking while we're in > the kernel. > > The alternative, which is what we seem to be learning towards now in > the latest tables from Dave (https://goo.gl/pXbvBE and > https://goo.gl/Grbuhf), is to do it on context switch when we might be > switching from a shallow call stack to a deeper one. Which has much > better performance characteristics for processes which make non- > sleeping syscalls. > > The caveat with the latter approach is that we do depend on the fact > that context switches are the only imbalance in the kernel. But that's > OK — we don't have a longjmp or anything else like that. Especially > that goes into a *deeper* call stack. Do we? At least some generated code might create RSB imbalances. Function graph tracing and kretprobes, for example. They mess with the return path and could probably underflow the RSB pretty easily. I guess they'd need to be reworked a bit so they only do a single ret. -- Josh
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote: > > If you unconditionally fill the RSB on every entry to supervisor mode, > then there are never guest-controlled RSB values to be found. > > With that property (and IBRS to protect Skylake+), you shouldn't need > RSB filling anywhere in the middle. Yes, that's right. We have a choice — we can do it on kernel entry (in the interrupt and syscall and NMI paths), and that's nice and easy and really safe because we know there's *never* a bad RSB entry lurking while we're in the kernel. The alternative, which is what we seem to be learning towards now in the latest tables from Dave (https://goo.gl/pXbvBE and https://goo.gl/Grbuhf), is to do it on context switch when we might be switching from a shallow call stack to a deeper one. Which has much better performance characteristics for processes which make non- sleeping syscalls. The caveat with the latter approach is that we do depend on the fact that context switches are the only imbalance in the kernel. But that's OK — we don't have a longjmp or anything else like that. Especially that goes into a *deeper* call stack. Do we? smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 2018-01-12 at 18:05 +, Andrew Cooper wrote: > > If you unconditionally fill the RSB on every entry to supervisor mode, > then there are never guest-controlled RSB values to be found. > > With that property (and IBRS to protect Skylake+), you shouldn't need > RSB filling anywhere in the middle. Yes, that's right. We have a choice — we can do it on kernel entry (in the interrupt and syscall and NMI paths), and that's nice and easy and really safe because we know there's *never* a bad RSB entry lurking while we're in the kernel. The alternative, which is what we seem to be learning towards now in the latest tables from Dave (https://goo.gl/pXbvBE and https://goo.gl/Grbuhf), is to do it on context switch when we might be switching from a shallow call stack to a deeper one. Which has much better performance characteristics for processes which make non- sleeping syscalls. The caveat with the latter approach is that we do depend on the fact that context switches are the only imbalance in the kernel. But that's OK — we don't have a longjmp or anything else like that. Especially that goes into a *deeper* call stack. Do we? smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 2018-01-12 at 10:02 -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > We need to do more things for Skylake (like idle and interrupt fill > and possibly deep call cahin), so I don't think it makes sense to > > - have an individual flag for each of these. It can be just a single > flag that enables all of this for Skylake > > - print something for each of them. that will just be very noisy > without any useful benefit to the user. I still think we are better off using IBRS by default on Skylake. This patch wasn't really for Skylake; the real use case was for AMD CPUs (!PTI) without SMEP. Since it happens to needed on Skylake too we might as well enable it there... but that doesn't mean I was planning to do all the other horrible crap we need for Skylake. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On Fri, 2018-01-12 at 10:02 -0800, Andi Kleen wrote: > > + if ((!boot_cpu_has(X86_FEATURE_PTI) && > > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > > + pr_info("Filling RSB on context switch\n"); > > We need to do more things for Skylake (like idle and interrupt fill > and possibly deep call cahin), so I don't think it makes sense to > > - have an individual flag for each of these. It can be just a single > flag that enables all of this for Skylake > > - print something for each of them. that will just be very noisy > without any useful benefit to the user. I still think we are better off using IBRS by default on Skylake. This patch wasn't really for Skylake; the real use case was for AMD CPUs (!PTI) without SMEP. Since it happens to needed on Skylake too we might as well enable it there... but that doesn't mean I was planning to do all the other horrible crap we need for Skylake. smime.p7s Description: S/MIME cryptographic signature
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On 12/01/18 17:49, David Woodhouse wrote: > When we context switch from a shallow call stack to a deeper one, as we > 'ret' up the deeper side we may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. This is > problematic if we have neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel), as malicious code in userspace > may then be executed speculatively. So overwrite the CPU's return > prediction stack with calls which are predicted to return to an infinite > loop, to "capture" speculation if this happens. This is required both > for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. > > On Skylake+ the problem is slightly different, and an *underflow* of the > RSB may cause errant branch predictions to occur. So there it's not so > much overwrite, as *filling* the RSB to attempt to prevent it getting > empty. This is only a partial solution for Skylake+ since there are many > other conditions which may result in the RSB becoming empty. The full > solution on Skylake+ is to use IBRS, which will prevent the problem even > when the RSB becomes empty. With IBRS, the RSB-stuffing will not be > required on context switch. If you unconditionally fill the RSB on every entry to supervisor mode, then there are never guest-controlled RSB values to be found. With that property (and IBRS to protect Skylake+), you shouldn't need RSB filling anywhere in the middle. ~Andrew
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
On 12/01/18 17:49, David Woodhouse wrote: > When we context switch from a shallow call stack to a deeper one, as we > 'ret' up the deeper side we may encounter RSB entries (predictions for > where the 'ret' goes to) which were populated in userspace. This is > problematic if we have neither SMEP nor KPTI (the latter of which marks > userspace pages as NX for the kernel), as malicious code in userspace > may then be executed speculatively. So overwrite the CPU's return > prediction stack with calls which are predicted to return to an infinite > loop, to "capture" speculation if this happens. This is required both > for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. > > On Skylake+ the problem is slightly different, and an *underflow* of the > RSB may cause errant branch predictions to occur. So there it's not so > much overwrite, as *filling* the RSB to attempt to prevent it getting > empty. This is only a partial solution for Skylake+ since there are many > other conditions which may result in the RSB becoming empty. The full > solution on Skylake+ is to use IBRS, which will prevent the problem even > when the RSB becomes empty. With IBRS, the RSB-stuffing will not be > required on context switch. If you unconditionally fill the RSB on every entry to supervisor mode, then there are never guest-controlled RSB values to be found. With that property (and IBRS to protect Skylake+), you shouldn't need RSB filling anywhere in the middle. ~Andrew
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
> + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); We need to do more things for Skylake (like idle and interrupt fill and possibly deep call cahin), so I don't think it makes sense to - have an individual flag for each of these. It can be just a single flag that enables all of this for Skylake - print something for each of them. that will just be very noisy without any useful benefit to the user. -Andi
Re: [PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
> + if ((!boot_cpu_has(X86_FEATURE_PTI) && > + !boot_cpu_has(X86_FEATURE_SMEP)) || is_skylake_era()) { > + setup_force_cpu_cap(X86_FEATURE_RSB_CTXSW); > + pr_info("Filling RSB on context switch\n"); We need to do more things for Skylake (like idle and interrupt fill and possibly deep call cahin), so I don't think it makes sense to - have an individual flag for each of these. It can be just a single flag that enables all of this for Skylake - print something for each of them. that will just be very noisy without any useful benefit to the user. -Andi
[PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
When we context switch from a shallow call stack to a deeper one, as we 'ret' up the deeper side we may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if we have neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel), as malicious code in userspace may then be executed speculatively. So overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. Signed-off-by: David WoodhouseAcked-by: Arjan van de Ven --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 34 ++ 4 files changed, 57 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..ef0e478 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When we switch from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..b2937d8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When we switch from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc261..c17cce3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n",
[PATCH] x86/retpoline: Fill RSB on context switch for affected CPUs
When we context switch from a shallow call stack to a deeper one, as we 'ret' up the deeper side we may encounter RSB entries (predictions for where the 'ret' goes to) which were populated in userspace. This is problematic if we have neither SMEP nor KPTI (the latter of which marks userspace pages as NX for the kernel), as malicious code in userspace may then be executed speculatively. So overwrite the CPU's return prediction stack with calls which are predicted to return to an infinite loop, to "capture" speculation if this happens. This is required both for retpoline, and also in conjunction with IBRS for !SMEP && !KPTI. On Skylake+ the problem is slightly different, and an *underflow* of the RSB may cause errant branch predictions to occur. So there it's not so much overwrite, as *filling* the RSB to attempt to prevent it getting empty. This is only a partial solution for Skylake+ since there are many other conditions which may result in the RSB becoming empty. The full solution on Skylake+ is to use IBRS, which will prevent the problem even when the RSB becomes empty. With IBRS, the RSB-stuffing will not be required on context switch. Signed-off-by: David Woodhouse Acked-by: Arjan van de Ven --- arch/x86/entry/entry_32.S | 11 +++ arch/x86/entry/entry_64.S | 11 +++ arch/x86/include/asm/cpufeatures.h | 1 + arch/x86/kernel/cpu/bugs.c | 34 ++ 4 files changed, 57 insertions(+) diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index a1f28a5..ef0e478 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -244,6 +244,17 @@ ENTRY(__switch_to_asm) movl%ebx, PER_CPU_VAR(stack_canary)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When we switch from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popl%esi popl%edi diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S index 59874bc..b2937d8 100644 --- a/arch/x86/entry/entry_64.S +++ b/arch/x86/entry/entry_64.S @@ -487,6 +487,17 @@ ENTRY(__switch_to_asm) movq%rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset #endif +#ifdef CONFIG_RETPOLINE + /* +* When we switch from a shallower to a deeper call stack +* the RSB may either underflow or use entries populated +* with userspace addresses. On CPUs where those concerns +* exist, overwrite the RSB with entries which capture +* speculative execution to prevent attack. +*/ + FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW +#endif + /* restore callee-saved registers */ popq%r15 popq%r14 diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index f275447..aa09559 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -211,6 +211,7 @@ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_MBA( 7*32+18) /* Memory Bandwidth Allocation */ +#define X86_FEATURE_RSB_CTXSW ( 7*32+19) /* Fill RSB on context switches */ /* Virtualization flags: Linux defined, word 8 */ #define X86_FEATURE_TPR_SHADOW ( 8*32+ 0) /* Intel TPR Shadow */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index e4dc261..c17cce3 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -23,6 +23,7 @@ #include #include #include +#include static void __init spectre_v2_select_mitigation(void); @@ -155,6 +156,22 @@ static enum spectre_v2_mitigation_cmd __init spectre_v2_parse_cmdline(void) return SPECTRE_V2_CMD_NONE; } +/* Check for Skylake-like CPUs (for RSB handling) */ +static bool __init is_skylake_era(void) +{ + if (boot_cpu_data.x86 == 6) { + switch (boot_cpu_data.x86_model) { + case INTEL_FAM6_SKYLAKE_MOBILE: + case INTEL_FAM6_SKYLAKE_DESKTOP: + case INTEL_FAM6_SKYLAKE_X: + case INTEL_FAM6_KABYLAKE_MOBILE: + case INTEL_FAM6_KABYLAKE_DESKTOP: + return true; + } + } + return false; +} + static void __init spectre_v2_select_mitigation(void) { enum spectre_v2_mitigation_cmd cmd = spectre_v2_parse_cmdline(); @@ -213,6 +230,23 @@ static void __init spectre_v2_select_mitigation(void) spectre_v2_enabled = mode; pr_info("%s\n", spectre_v2_strings[mode]); + + /* +