Re: [PATCH v3 4/5] prctl: Hook L1D flushing in via prctl

2020-12-04 Thread Balbir Singh
On Fri, Dec 04, 2020 at 11:19:17PM +0100, Thomas Gleixner wrote:
> 
> Balbir,
> 
> On Fri, Nov 27 2020 at 17:59, Balbir Singh wrote:
> > +enum l1d_flush_out_mitigations {
> > + L1D_FLUSH_OUT_OFF,
> > + L1D_FLUSH_OUT_ON,
> > +};
> > +
> > +static enum l1d_flush_out_mitigations l1d_flush_out_mitigation 
> > __ro_after_init = L1D_FLUSH_OUT_ON;
> 
> Why default on and why stays it on when the CPU is not affected by L1TF ...
> 

Because we don't set the PRCTL is the processor is not affected by the
bug

> >  /* Default mitigation for TAA-affected CPUs */
> >  static enum taa_mitigations taa_mitigation __ro_after_init = 
> > TAA_MITIGATION_VERW;
> >  static bool taa_nosmt __ro_after_init;
> > @@ -379,6 +386,18 @@ static void __init taa_select_mitigation(void)
> >   pr_info("%s\n", taa_strings[taa_mitigation]);
> >  }
> >
> > +static int __init l1d_flush_out_parse_cmdline(char *str)
> > +{
> > + if (!boot_cpu_has_bug(X86_BUG_L1TF))
> > + return 0;
> 
> ... while here you check for L1TF.
> 
> Also shouldn't it be default off and enabled via command line?
> 

I chose the other way because the prctl is an opt-in as well

> > +static int l1d_flush_out_prctl_get(struct task_struct *task)
> > +{
> > + int ret;
> > +
> > + if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
> > + return PR_SPEC_FORCE_DISABLE;
> > +
> > + ret = test_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
> 
> That ret indirection is pointless. Just make it if (test_)

Sure, will do

> 
> > +static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long 
> > ctrl)
> > +{
> > +
> > + if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
> > + return -EPERM;
> 
> So here you check for off and then...
> 

Yes

> >  int enable_l1d_flush_for_task(struct task_struct *tsk)
> >  {
> > + /*
> > +  * Do not enable L1D_FLUSH_OUT if
> > +  * b. The CPU is not affected by the L1TF bug
> > +  * c. The CPU does not have L1D FLUSH feature support
> > +  */
> > +
> > + if (!boot_cpu_has_bug(X86_BUG_L1TF) ||
> > + !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
> > + return -EINVAL;
> 
> ... you check for the feature bits with a malformatted condition at some
> other place. It's not supported when these conditions are not there. So
> why having this check here?
> 
> > +
> >   set_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
> >   return 0;
> >  }
> 
> Aside of that, why is this in tlb.c and not in bugs.c? There is nothing
> tlb specific in these enable/disable functions. They just fiddle with
> the TIF bit.
> 

I can move them over.

> > +/*
> > + * Sent to a task that opts into L1D flushing via the prctl interface
> > + * but ends up running on an SMT enabled core.
> > + */
> > +static void l1d_flush_kill(struct callback_head *ch)
> > +{
> > + force_sig(SIGBUS);
> > +}
> > +
> >  static inline unsigned long mm_mangle_tif_spec_bits(struct task_struct 
> > *next)
> >  {
> >   unsigned long next_tif = task_thread_info(next)->flags;
> >   unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & 
> > LAST_USER_MM_SPEC_MASK;
> > + unsigned long next_mm;
> >
> >   BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
> > - return (unsigned long)next->mm | spec_bits;
> > + next_mm = (unsigned long)next->mm | spec_bits;
> > +
> > + if ((next_mm & LAST_USER_MM_L1D_FLUSH) && 
> > this_cpu_read(cpu_info.smt_active)) {
> 
> Wh. Yet more unconditional checks on every context switch.

A task can only get here if it is affected by the bug (processor has
L1TF and L1D_FLUSH support) and the task opted in, I think what your
suggesting is that we avoid the check for all tasks (the signgle next_mm
& LAST_USER_MM_L1D_FLUSH) check as well?

> 
> > + clear_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
> > + next->l1d_flush_kill.func = l1d_flush_kill;
> > + task_work_add(next, >l1d_flush_kill, true);
> 
> int task_work_add(struct task_struct *task, struct callback_head *twork,
>   enum task_work_notify_mode mode);
> 
> true is truly not a valid enum constant 

:) I might really have added it when we were transitioning from true to
TWA_RESUME, I am surprised the compiler did not catch it, I'll move it
over.

> 
> > + }
> 
> So you really want to have:
> 
> DEFINE_STATIC_KEY_FALSE(l1dflush_enabled);
> static bool l1dflush_mitigation __init_data;
> 
> and then with the command line option you set l1dflush_mitigation and in
> check_bugs() you invoke l1dflush_select_mitigation() which does:
> 
>if (!l1dflush_mitigation || !boot_cpu_has_bug(X86_BUG_L1TF) ||
>!boot_cpu_has(X86_FEATURE_FLUSH_L1D))
> return;
> 
>static_branch_enable(_enabled);
> 
> and then in l1d_flush_out_prctl_set()
> 
>if (!static_branch_unlikely(_enabled))
> return -ENOTSUPP;
> 
> Then make the whole switch machinery do:
> 

Re: [PATCH v3 4/5] prctl: Hook L1D flushing in via prctl

2020-12-04 Thread Thomas Gleixner
Balbir,

On Fri, Nov 27 2020 at 17:59, Balbir Singh wrote:
> +enum l1d_flush_out_mitigations {
> + L1D_FLUSH_OUT_OFF,
> + L1D_FLUSH_OUT_ON,
> +};
> +
> +static enum l1d_flush_out_mitigations l1d_flush_out_mitigation 
> __ro_after_init = L1D_FLUSH_OUT_ON;

Why default on and why stays it on when the CPU is not affected by L1TF ...

>  /* Default mitigation for TAA-affected CPUs */
>  static enum taa_mitigations taa_mitigation __ro_after_init = 
> TAA_MITIGATION_VERW;
>  static bool taa_nosmt __ro_after_init;
> @@ -379,6 +386,18 @@ static void __init taa_select_mitigation(void)
>   pr_info("%s\n", taa_strings[taa_mitigation]);
>  }
>  
> +static int __init l1d_flush_out_parse_cmdline(char *str)
> +{
> + if (!boot_cpu_has_bug(X86_BUG_L1TF))
> + return 0;

... while here you check for L1TF.

Also shouldn't it be default off and enabled via command line?

> +static int l1d_flush_out_prctl_get(struct task_struct *task)
> +{
> + int ret;
> +
> + if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
> + return PR_SPEC_FORCE_DISABLE;
> +
> + ret = test_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);

That ret indirection is pointless. Just make it if (test_)

> +static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long 
> ctrl)
> +{
> +
> + if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
> + return -EPERM;

So here you check for off and then...

>  int enable_l1d_flush_for_task(struct task_struct *tsk)
>  {
> + /*
> +  * Do not enable L1D_FLUSH_OUT if
> +  * b. The CPU is not affected by the L1TF bug
> +  * c. The CPU does not have L1D FLUSH feature support
> +  */
> +
> + if (!boot_cpu_has_bug(X86_BUG_L1TF) ||
> + !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
> + return -EINVAL;

... you check for the feature bits with a malformatted condition at some
other place. It's not supported when these conditions are not there. So
why having this check here?

> +
>   set_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
>   return 0;
>  }

Aside of that, why is this in tlb.c and not in bugs.c? There is nothing
tlb specific in these enable/disable functions. They just fiddle with
the TIF bit.

> +/*
> + * Sent to a task that opts into L1D flushing via the prctl interface
> + * but ends up running on an SMT enabled core.
> + */
> +static void l1d_flush_kill(struct callback_head *ch)
> +{
> + force_sig(SIGBUS);
> +}
> +
>  static inline unsigned long mm_mangle_tif_spec_bits(struct task_struct *next)
>  {
>   unsigned long next_tif = task_thread_info(next)->flags;
>   unsigned long spec_bits = (next_tif >> TIF_SPEC_IB) & 
> LAST_USER_MM_SPEC_MASK;
> + unsigned long next_mm;
>  
>   BUILD_BUG_ON(TIF_SPEC_L1D_FLUSH != TIF_SPEC_IB + 1);
> - return (unsigned long)next->mm | spec_bits;
> + next_mm = (unsigned long)next->mm | spec_bits;
> +
> + if ((next_mm & LAST_USER_MM_L1D_FLUSH) && 
> this_cpu_read(cpu_info.smt_active)) {

Wh. Yet more unconditional checks on every context switch.

> + clear_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
> + next->l1d_flush_kill.func = l1d_flush_kill;
> + task_work_add(next, >l1d_flush_kill, true);

int task_work_add(struct task_struct *task, struct callback_head *twork,
  enum task_work_notify_mode mode);

true is truly not a valid enum constant 

> + }

So you really want to have:

DEFINE_STATIC_KEY_FALSE(l1dflush_enabled);
static bool l1dflush_mitigation __init_data;

and then with the command line option you set l1dflush_mitigation and in
check_bugs() you invoke l1dflush_select_mitigation() which does:

   if (!l1dflush_mitigation || !boot_cpu_has_bug(X86_BUG_L1TF) ||
   !boot_cpu_has(X86_FEATURE_FLUSH_L1D))
return;

   static_branch_enable(_enabled);

and then in l1d_flush_out_prctl_set()

   if (!static_branch_unlikely(_enabled))
return -ENOTSUPP;

Then make the whole switch machinery do:

  if (static_branch_unlikely(_enabled)) {
if (unlikely((next_mm | prev_mm) & LAST_USER_MM_L1D_FLUSH))
 l1dflush_evaluate(next_mm, prev_mm);
  }

and l1dflush_evaluate()

 if (prev_mm & LAST_USER_MM_L1D_FLUSH)
  l1d_flush();

 if ((next_mm & LAST_USER_MM_L1D_FLUSH) &&
 this_cpu_read(cpu_info.smt_active)) {

  clear_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
  next->l1d_flush_kill.func = l1d_flush_kill;
  task_work_add(next, >l1d_flush_kill, TWA_RESUME);
 }

That way the overhead is on systems where the admin decides to enable it
and if enabled the evaluation of prev_mm and next_mm is pushed out of
line.

Hmm?

Thanks,

tglx


[PATCH v3 4/5] prctl: Hook L1D flushing in via prctl

2020-11-26 Thread Balbir Singh
Use the existing PR_GET/SET_SPECULATION_CTRL API to expose the L1D
flush capability. For L1D flushing PR_SPEC_FORCE_DISABLE and
PR_SPEC_DISABLE_NOEXEC are not supported.

Enabling L1D flush does not check if the task is running on
an SMT enabled core, rather a check is done at runtime (at the
time of flush), if the task runs on a non SMT enabled core
then the task is sent a SIGBUS (this is done prior to the task
executing on the core, so no data is leaked). This is better
than the other alternatives of

a. Ensuring strict affinity of the task (hard to enforce
without further changes in the scheduler)
b. Silently skipping flush for tasks that move to SMT enabled
cores.

An arch config ARCH_HAS_PARANOID_L1D_FLUSH has been added
and struct task carries a callback_head for arch's that support
this config (currently on x86), this callback head is used
to schedule task work (SIGBUS delivery).

There is also no seccomp integration for the feature.

Suggested-by: Thomas Gleixner 
Signed-off-by: Balbir Singh 
Signed-off-by: Thomas Gleixner 
---
 arch/Kconfig   |  4 +++
 arch/x86/Kconfig   |  1 +
 arch/x86/kernel/cpu/bugs.c | 54 ++
 arch/x86/mm/tlb.c  | 30 -
 include/linux/sched.h  | 10 +++
 include/uapi/linux/prctl.h |  1 +
 6 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 56b6ccc0e32d..d4a0501ac7fc 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -311,6 +311,10 @@ config ARCH_32BIT_OFF_T
  still support 32-bit off_t. This option is enabled for all such
  architectures explicitly.
 
+config ARCH_HAS_PARANOID_L1D_FLUSH
+   bool
+   default n
+
 config HAVE_ASM_MODVERSIONS
bool
help
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f6946b81f74a..4f6caa6dae16 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -101,6 +101,7 @@ config X86
select ARCH_WANTS_DYNAMIC_TASK_STRUCT
select ARCH_WANT_HUGE_PMD_SHARE
select ARCH_WANTS_THP_SWAP  if X86_64
+   select ARCH_HAS_PARANOID_L1D_FLUSH
select BUILDTIME_TABLE_SORT
select CLKEVT_I8253
select CLOCKSOURCE_VALIDATE_LAST_CYCLE
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 581fb7223ad0..dece79e4d1e9 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -296,6 +296,13 @@ enum taa_mitigations {
TAA_MITIGATION_TSX_DISABLED,
 };
 
+enum l1d_flush_out_mitigations {
+   L1D_FLUSH_OUT_OFF,
+   L1D_FLUSH_OUT_ON,
+};
+
+static enum l1d_flush_out_mitigations l1d_flush_out_mitigation __ro_after_init 
= L1D_FLUSH_OUT_ON;
+
 /* Default mitigation for TAA-affected CPUs */
 static enum taa_mitigations taa_mitigation __ro_after_init = 
TAA_MITIGATION_VERW;
 static bool taa_nosmt __ro_after_init;
@@ -379,6 +386,18 @@ static void __init taa_select_mitigation(void)
pr_info("%s\n", taa_strings[taa_mitigation]);
 }
 
+static int __init l1d_flush_out_parse_cmdline(char *str)
+{
+   if (!boot_cpu_has_bug(X86_BUG_L1TF))
+   return 0;
+
+   if (!strcmp(str, "off"))
+   l1d_flush_out_mitigation = L1D_FLUSH_OUT_OFF;
+
+   return 0;
+}
+early_param("l1d_flush_out", l1d_flush_out_parse_cmdline);
+
 static int __init tsx_async_abort_parse_cmdline(char *str)
 {
if (!boot_cpu_has_bug(X86_BUG_TAA))
@@ -1215,6 +1234,23 @@ static void task_update_spec_tif(struct task_struct *tsk)
speculation_ctrl_update_current();
 }
 
+static int l1d_flush_out_prctl_set(struct task_struct *task, unsigned long 
ctrl)
+{
+
+   if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
+   return -EPERM;
+
+   switch (ctrl) {
+   case PR_SPEC_ENABLE:
+   return enable_l1d_flush_for_task(task);
+   case PR_SPEC_DISABLE:
+   return disable_l1d_flush_for_task(task);
+   default:
+   return -ERANGE;
+   }
+   return 0;
+}
+
 static int ssb_prctl_set(struct task_struct *task, unsigned long ctrl)
 {
if (ssb_mode != SPEC_STORE_BYPASS_PRCTL &&
@@ -1324,6 +1360,8 @@ int arch_prctl_spec_ctrl_set(struct task_struct *task, 
unsigned long which,
return ssb_prctl_set(task, ctrl);
case PR_SPEC_INDIRECT_BRANCH:
return ib_prctl_set(task, ctrl);
+   case PR_SPEC_L1D_FLUSH_OUT:
+   return l1d_flush_out_prctl_set(task, ctrl);
default:
return -ENODEV;
}
@@ -1340,6 +1378,20 @@ void arch_seccomp_spec_mitigate(struct task_struct *task)
 }
 #endif
 
+static int l1d_flush_out_prctl_get(struct task_struct *task)
+{
+   int ret;
+
+   if (l1d_flush_out_mitigation == L1D_FLUSH_OUT_OFF)
+   return PR_SPEC_FORCE_DISABLE;
+
+   ret = test_ti_thread_flag(>thread_info, TIF_SPEC_L1D_FLUSH);
+   if (ret)
+   return PR_SPEC_PRCTL | PR_SPEC_ENABLE;
+   else
+   return