On Mon, May 04, 2020 at 02:13:42PM +1000, Balbir Singh wrote:
> Implement a mechanism to selectively flush the L1D cache. The goal is to
> allow tasks that are paranoid due to the recent snoop assisted data sampling
> vulnerabilites, to flush their L1D on being switched out.  This protects
> their data from being snooped or leaked via side channels after the task
> has context switched out.
> 
> There are two scenarios we might want to protect against, a task leaving
> the CPU with data still in L1D (which is the main concern of this patch),
> the second scenario is a malicious task coming in (not so well trusted)
> for which we want to clean up the cache before it starts. Only the case
> for the former is addressed.
> 
> A new thread_info flag TIF_SPEC_FLUSH_L1D is added to track tasks which
> opt-into L1D flushing. cpu_tlbstate.last_user_mm_spec is used to convert
> the TIF flags into mm state (per cpu via last_user_mm_spec) in
> cond_mitigation(), which then used to do decide when to call flush_l1d().
> 
> Add prctl()'s to opt-in to the L1D cache on context switch out, the
> existing mechanisms of tracking prev_mm via cpu_tlbstate is
> reused to track state of the tasks and to flush the L1D cache.
> The prctl interface is generic and can be ported over to other
> architectures.
> 
> Suggested-by: Thomas Gleixner <[email protected]>
> Signed-off-by: Balbir Singh <[email protected]>
> Reviewed-by: Kees Cook <[email protected]>
> ---
>  arch/x86/include/asm/thread_info.h |  7 ++++-
>  arch/x86/mm/tlb.c                  | 44 ++++++++++++++++++++++++++++--
>  include/uapi/linux/prctl.h         |  4 +++
>  kernel/sys.c                       | 20 ++++++++++++++
>  4 files changed, 72 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/x86/include/asm/thread_info.h 
> b/arch/x86/include/asm/thread_info.h
> index 8de8ceccb8bc..67de693d9ba1 100644
> --- a/arch/x86/include/asm/thread_info.h
> +++ b/arch/x86/include/asm/thread_info.h
> @@ -84,7 +84,7 @@ struct thread_info {
>  #define TIF_SYSCALL_AUDIT    7       /* syscall auditing active */
>  #define TIF_SECCOMP          8       /* secure computing */
>  #define TIF_SPEC_IB          9       /* Indirect branch speculation 
> mitigation */
> -#define TIF_SPEC_FORCE_UPDATE        10      /* Force speculation MSR update 
> in context switch */
> +#define TIF_SPEC_FLUSH_L1D   10      /* Flush L1D on mm switches (processes) 
> */
>  #define TIF_USER_RETURN_NOTIFY       11      /* notify kernel of userspace 
> return */
>  #define TIF_UPROBE           12      /* breakpointed or singlestepping */
>  #define TIF_PATCH_PENDING    13      /* pending live patching update */
> @@ -96,6 +96,7 @@ struct thread_info {
>  #define TIF_MEMDIE           20      /* is terminating due to OOM killer */
>  #define TIF_POLLING_NRFLAG   21      /* idle is polling for TIF_NEED_RESCHED 
> */
>  #define TIF_IO_BITMAP                22      /* uses I/O bitmap */
> +#define TIF_SPEC_FORCE_UPDATE        23      /* Force speculation MSR update 
> in context switch */
>  #define TIF_FORCED_TF                24      /* true if TF in eflags 
> artificially */
>  #define TIF_BLOCKSTEP                25      /* set when we want 
> DEBUGCTLMSR_BTF */
>  #define TIF_LAZY_MMU_UPDATES 27      /* task is updating the mmu lazily */
> @@ -132,6 +133,7 @@ struct thread_info {
>  #define _TIF_ADDR32          (1 << TIF_ADDR32)
>  #define _TIF_X32             (1 << TIF_X32)
>  #define _TIF_FSCHECK         (1 << TIF_FSCHECK)
> +#define _TIF_SPEC_FLUSH_L1D  (1 << TIF_SPEC_FLUSH_L1D)
>  
>  /* Work to do before invoking the actual syscall. */
>  #define _TIF_WORK_SYSCALL_ENTRY      \
> @@ -235,6 +237,9 @@ static inline int arch_within_stack_frames(const void * 
> const stack,
>                          current_thread_info()->status & TS_COMPAT)
>  #endif
>  
> +extern int arch_prctl_l1d_flush_set(struct task_struct *tsk, unsigned long 
> enable);
> +extern int arch_prctl_l1d_flush_get(struct task_struct *tsk);
> +
>  extern void arch_task_cache_init(void);
>  extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct 
> *src);
>  extern void arch_release_task_struct(struct task_struct *tsk);
> diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
> index 10056b8d8f01..7ea9bc9e089f 100644
> --- a/arch/x86/mm/tlb.c
> +++ b/arch/x86/mm/tlb.c
> @@ -13,6 +13,7 @@
>  #include <asm/mmu_context.h>
>  #include <asm/nospec-branch.h>
>  #include <asm/cache.h>
> +#include <asm/cacheflush.h>
>  #include <asm/apic.h>
>  #include <asm/uv/uv.h>
>  
> @@ -43,11 +44,12 @@
>   */
>  
>  /*
> - * Bits to mangle the TIF_SPEC_IB state into the mm pointer which is
> + * Bits to mangle the TIF_SPEC_* state into the mm pointer which is
>   * stored in cpu_tlb_state.last_user_mm_spec.
>   */
>  #define LAST_USER_MM_IBPB    0x1UL
> -#define LAST_USER_MM_SPEC_MASK       (LAST_USER_MM_IBPB)
> +#define LAST_USER_MM_L1D_FLUSH       0x2UL
> +#define LAST_USER_MM_SPEC_MASK       (LAST_USER_MM_IBPB | 
> LAST_USER_MM_L1D_FLUSH)
>  
>  /*
>   * The x86 feature is called PCID (Process Context IDentifier). It is similar
> @@ -308,6 +310,35 @@ void leave_mm(int cpu)
>  }
>  EXPORT_SYMBOL_GPL(leave_mm);
>  
> +static int enable_l1d_flush_for_task(struct task_struct *tsk)
> +{
> +     int ret = l1d_flush_init_once();
> +
> +     if (ret < 0)
> +             return ret;

Am I reading correctly (in the v5 delta) that with l1d_flush_init_once()
now testing for Intel CPUs, it means processes on non-Intel x86 CPUs
can't use the prctl() feature as a defense-in-depth to potential future
L1D cache side-channel flaws?

Why can't the L1D_CACHE_ORDER just get set dynamically based on CPU?

> +
> +     set_ti_thread_flag(&tsk->thread_info, TIF_SPEC_FLUSH_L1D);
> +     return ret;
> +}

-- 
Kees Cook

Reply via email to