Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-16 Thread Alexander Graf




On 13.11.20 16:55, Joel Fernandes wrote:




+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+   if (!strcmp(arg, "off"))
+   coresched_cmd = CORE_SCHED_OFF;
+   else if (!strcmp(arg, "on"))
+   coresched_cmd = CORE_SCHED_ON;
+   else if (!strcmp(arg, "secure"))
+   coresched_cmd = CORE_SCHED_SECURE;
+   else
+   pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+   arg);
+
+   return 0;



Instead of calling the matching function over and over again, can we just
configure a static branch (see below) based on the command line setting
here? Or do we not know about the bugs yet?

[...]

+static bool __coresched_supported(void)
+{
+   /* coreched=off command line option. */
+   if (coresched_cmd_off())
+   return false;
+
+   /*
+* Some arch may not need coresched, example some x86 may not need
+* coresched if coresched=secure option is passed (=secure is default).
+*/
+   return arch_allow_core_sched();
+}
+
   void sched_core_get(void)
   {
+   if (!__coresched_supported())
+   return;


I would expect core scheduling to be basically an option that you set once
and never flip. This sounds like a prefect use case for a static branch to
me?


Something ike so then? If Ok, let me know if I can add your Reviewed-by tag.

thanks!

---8<---

From: "Joel Fernandes (Google)" 
Subject: [PATCH] sched: Add a coresched command line option

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow those device down.

However, some users may want core scheduling even if the hardware is
secure. To support them, add a coresched= option which defaults to
'secure' and can be overridden to 'on' if the user wants to enable
coresched even if the HW is not vulnerable. 'off' would disable
core scheduling in any case.

Also add a sched_debug entry to indicate if core scheduling is turned on
or not.

Signed-off-by: Joel Fernandes (Google) 
---
  arch/x86/kernel/cpu/bugs.c | 19 +++
  include/linux/cpu.h|  1 +
  include/linux/sched/smt.h  |  4 
  kernel/cpu.c   | 38 ++
  kernel/sched/core.c|  6 ++
  kernel/sched/debug.c   |  4 
  6 files changed, 72 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..7607c9cd7f0f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
  static void __init mds_print_mitigation(void);
  static void __init taa_select_mitigation(void);
  static void __init srbds_select_mitigation(void);
+static void __init coresched_select(void);

  /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
  u64 x86_spec_ctrl_base;
@@ -103,6 +104,9 @@ void __init check_bugs(void)
 if (boot_cpu_has(X86_FEATURE_STIBP))
 x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;

+   /* Update whether core-scheduling is needed. */
+   coresched_select();
+
 /* Select the proper CPU mitigations before patching alternatives: */
 spectre_v1_select_mitigation();
 spectre_v2_select_mitigation();
@@ -1808,4 +1812,19 @@ ssize_t cpu_show_srbds(struct device *dev, struct 
device_attribute *attr, char *
  {
 return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
  }
+
+/*
+ * When coresched=secure command line option is passed (default), disable core
+ * scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+static void __init coresched_select(void)
+{
+#ifdef CONFIG_SCHED_CORE
+   if (!coresched_cmd_secure())


Make this a positive branch instead please :).

/*
 * Disable core scheduling on non-MDS, non-L1TF systems
 * when coresched=secure (default)
 */
if (coresched_cmd_secure() &&
!boot_cpu_has_bug(X86_BUG_MDS) &&
!boot_cpu_has_bug(X86_BUG_L1TF))
static_branch_disable(_coresched_supported);


+   return;
+   if (!boot_cpu_has_bug(X86_BUG_MDS) && !boot_cpu_has_bug(X86_BUG_L1TF))
+   static_branch_disable(_coresched_supported);
+#endif
+}
+
  #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..d1f1e64316d6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,5 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control 
ctrlval) { return 0;
  extern bool cpu_mitigations_off(void);
  extern bool cpu_mitigations_auto_nosmt(void);

+extern bool coresched_cmd_secure(void);
  #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 59d3736c454c..561064eb3268 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ 

Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-13 Thread Joel Fernandes
> > +static enum coresched_cmds coresched_cmd __ro_after_init = 
> > CORE_SCHED_SECURE;
> > +
> > +static int __init coresched_parse_cmdline(char *arg)
> > +{
> > +   if (!strcmp(arg, "off"))
> > +   coresched_cmd = CORE_SCHED_OFF;
> > +   else if (!strcmp(arg, "on"))
> > +   coresched_cmd = CORE_SCHED_ON;
> > +   else if (!strcmp(arg, "secure"))
> > +   coresched_cmd = CORE_SCHED_SECURE;
> > +   else
> > +   pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
> > +   arg);
> > +
> > +   return 0;
> 
> 
> Instead of calling the matching function over and over again, can we just
> configure a static branch (see below) based on the command line setting
> here? Or do we not know about the bugs yet?
[...]
> > +static bool __coresched_supported(void)
> > +{
> > +   /* coreched=off command line option. */
> > +   if (coresched_cmd_off())
> > +   return false;
> > +
> > +   /*
> > +* Some arch may not need coresched, example some x86 may not need
> > +* coresched if coresched=secure option is passed (=secure is 
> > default).
> > +*/
> > +   return arch_allow_core_sched();
> > +}
> > +
> >   void sched_core_get(void)
> >   {
> > +   if (!__coresched_supported())
> > +   return;
> 
> I would expect core scheduling to be basically an option that you set once
> and never flip. This sounds like a prefect use case for a static branch to
> me?

Something ike so then? If Ok, let me know if I can add your Reviewed-by tag.

thanks!

---8<---

From: "Joel Fernandes (Google)" 
Subject: [PATCH] sched: Add a coresched command line option

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow those device down.

However, some users may want core scheduling even if the hardware is
secure. To support them, add a coresched= option which defaults to
'secure' and can be overridden to 'on' if the user wants to enable
coresched even if the HW is not vulnerable. 'off' would disable
core scheduling in any case.

Also add a sched_debug entry to indicate if core scheduling is turned on
or not.

Signed-off-by: Joel Fernandes (Google) 
---
 arch/x86/kernel/cpu/bugs.c | 19 +++
 include/linux/cpu.h|  1 +
 include/linux/sched/smt.h  |  4 
 kernel/cpu.c   | 38 ++
 kernel/sched/core.c|  6 ++
 kernel/sched/debug.c   |  4 
 6 files changed, 72 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..7607c9cd7f0f 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -43,6 +43,7 @@ static void __init mds_select_mitigation(void);
 static void __init mds_print_mitigation(void);
 static void __init taa_select_mitigation(void);
 static void __init srbds_select_mitigation(void);
+static void __init coresched_select(void);
 
 /* The base value of the SPEC_CTRL MSR that always has to be preserved. */
 u64 x86_spec_ctrl_base;
@@ -103,6 +104,9 @@ void __init check_bugs(void)
if (boot_cpu_has(X86_FEATURE_STIBP))
x86_spec_ctrl_mask |= SPEC_CTRL_STIBP;
 
+   /* Update whether core-scheduling is needed. */
+   coresched_select();
+
/* Select the proper CPU mitigations before patching alternatives: */
spectre_v1_select_mitigation();
spectre_v2_select_mitigation();
@@ -1808,4 +1812,19 @@ ssize_t cpu_show_srbds(struct device *dev, struct 
device_attribute *attr, char *
 {
return cpu_show_common(dev, attr, buf, X86_BUG_SRBDS);
 }
+
+/*
+ * When coresched=secure command line option is passed (default), disable core
+ * scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+static void __init coresched_select(void)
+{
+#ifdef CONFIG_SCHED_CORE
+   if (!coresched_cmd_secure())
+   return;
+   if (!boot_cpu_has_bug(X86_BUG_MDS) && !boot_cpu_has_bug(X86_BUG_L1TF))
+   static_branch_disable(_coresched_supported);
+#endif
+}
+
 #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..d1f1e64316d6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,5 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control 
ctrlval) { return 0;
 extern bool cpu_mitigations_off(void);
 extern bool cpu_mitigations_auto_nosmt(void);
 
+extern bool coresched_cmd_secure(void);
 #endif /* _LINUX_CPU_H_ */
diff --git a/include/linux/sched/smt.h b/include/linux/sched/smt.h
index 59d3736c454c..561064eb3268 100644
--- a/include/linux/sched/smt.h
+++ b/include/linux/sched/smt.h
@@ -17,4 +17,8 @@ static inline bool sched_smt_active(void) { return false; }
 
 void arch_smt_update(void);
 
+#ifdef CONFIG_SCHED_CORE
+extern struct static_key_true sched_coresched_supported;
+#endif
+
 #endif
diff --git 

Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Alexander Graf




On 12.11.20 15:40, Joel Fernandes wrote:


On Thu, Nov 12, 2020 at 08:40:05AM -0500, Joel Fernandes wrote:

On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:



On 11.11.20 23:15, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  wrote:


On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:

On 11.11.20 22:14, Joel Fernandes wrote:

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
#endif
}

+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);


Can we make this more generic and user settable, similar to the L1 cache
flushing modes in KVM?

I am not 100% convinced that there are no other thread sibling attacks
possible without MDS and L1TF. If I'm paranoid, I want to still be able
to force enable core scheduling.

In addition, we are also using core scheduling as a poor man's mechanism
to give customers consistent performance for virtual machine thread
siblings. This is important irrespective of CPU bugs. In such a
scenario, I want to force enable core scheduling.


Ok,  I can make it new kernel command line option with:
coresched=on
coresched=secure (only if HW has MDS/L1TF)
coresched=off


Also, I would keep "secure" as the default.  (And probably, we should
modify the informational messages in sysfs to reflect this..)


I agree that "secure" should be the default.


Ok.


Something like so then:

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..3c2457d47f54 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,21 @@ void __init check_bugs(void)
  #endif
  }

+/*
+ * When coresched=secure, do not need coresched if CPU does not have MDS/L1TF 
bugs.
+ */
+int arch_allow_core_sched(void)
+{
+   /*
+* x86: Disallow coresched if it is in secure mode and the CPU does not
+* have vulnerabilities.
+*/
+   if (coresched_cmd_secure())
+   return boot_cpu_has_bug(X86_BUG_MDS) || 
boot_cpu_has_bug(X86_BUG_L1TF);
+   else
+   return true;
+}
+
  void
  x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool 
setguest)
  {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..1be5cf85a4a6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,7 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control 
ctrlval) { return 0;
  extern bool cpu_mitigations_off(void);
  extern bool cpu_mitigations_auto_nosmt(void);

+extern bool coresched_cmd_off(void);
+extern bool coresched_cmd_secure(void);
+
  #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ff2578ecf17..674edf534cc5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2552,3 +2552,46 @@ bool cpu_mitigations_auto_nosmt(void)
 return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
  }
  EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+
+/*
+ * These are used for a global "coresched=" cmdline option for controlling
+ * core scheduling. Note that core sched may be needed for usecases other
+ * than security as well.
+ */
+enum coresched_cmds {
+   CORE_SCHED_OFF,
+   CORE_SCHED_SECURE,
+   CORE_SCHED_ON,
+};
+
+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+   if (!strcmp(arg, "off"))
+   coresched_cmd = CORE_SCHED_OFF;
+   else if (!strcmp(arg, "on"))
+   coresched_cmd = CORE_SCHED_ON;
+   else if (!strcmp(arg, "secure"))
+   coresched_cmd = CORE_SCHED_SECURE;
+   else
+   pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+   arg);
+
+   return 0;



Instead of calling the matching function over and over again, can we 
just configure a static branch (see below) based on the command line 
setting here? Or do we not know about the bugs yet?



+}
+early_param("coresched", coresched_parse_cmdline);
+
+/* coresched=off */
+bool coresched_cmd_off(void)
+{
+   return coresched_cmd == CORE_SCHED_OFF;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_off);
+
+/* coresched=secure */
+bool coresched_cmd_secure(void)
+{
+   return coresched_cmd == CORE_SCHED_SECURE;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_secure);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5ed26b469ed6..6f586d221ddb 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -333,8 +333,23 @@ static void __sched_core_disable(void)
 printk("core 

Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Alexander Graf




On 12.11.20 16:28, Joel Fernandes wrote:


On Thu, Nov 12, 2020 at 03:52:32PM +0100, Alexander Graf wrote:



On 12.11.20 14:40, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:



On 11.11.20 23:15, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  wrote:


On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:

On 11.11.20 22:14, Joel Fernandes wrote:

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
 #endif
 }

+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);


Can we make this more generic and user settable, similar to the L1 cache
flushing modes in KVM?

I am not 100% convinced that there are no other thread sibling attacks
possible without MDS and L1TF. If I'm paranoid, I want to still be able
to force enable core scheduling.

In addition, we are also using core scheduling as a poor man's mechanism
to give customers consistent performance for virtual machine thread
siblings. This is important irrespective of CPU bugs. In such a
scenario, I want to force enable core scheduling.


Ok,  I can make it new kernel command line option with:
coresched=on
coresched=secure (only if HW has MDS/L1TF)
coresched=off


Also, I would keep "secure" as the default.  (And probably, we should
modify the informational messages in sysfs to reflect this..)


I agree that "secure" should be the default.


Ok.


Can we also integrate into the "mitigations" kernel command line[1] for this?


Sure, the integration into [1] sounds conceptually fine to me however it is
not super straight forward. Like: What if user wants to force-enable
core-scheduling for the usecase you mention, but still wants the cross-HT
mitigation because they are only tagging VMs (as in your usecase) and not
other tasks. Idk.


Can we roll this backwards from what you would expect as a user? How about
we make this 2-dimensional?

   coresched=[on|off|secure][,force]

where "on" means "core scheduling can be done if colors are set", "off"
means "no core scheduling is done" and "secure" means "core scheduling can
be done on MDS or L1TF if colors are set".


So support for this force thing is not there ATM in the patchset. We can
always incrementally add it later. I personally don't expect users to be Ok
with tagging every single task as it is equivalent to disabling SMT and makes
coresched useless.


It just flips the default from "always consider everything safe" to 
"always consider everything unsafe". Inside a cgroup, you can still set 
the same color to make use of siblings.


Either way, I agree that it can be a follow-up.




The "force" option would then mean "apply a color to every new task".

What then happens with mitigations= is easy. "auto" means
"coresched=secure". "off" means "coresched=off" and if you want to force
core scheduling for everything if necessary, you just do mitigations=auto
coresched=auto,force.

Am I missing something obvious? :)


I guess I am confused for the following usage:
mitigations=auto,nosmt coresched=secure

Note that auto,nosmt disables SMT selectively *only if needed*. Now, you add
coresched=secure to the mix. Should auto,nosmt disable SMT or not? It should be
disabled if the user did not tag anything (because system is insecure). It
should be enabled, if they tagged things. So it really depends on user doing
the right thing. And it is super confusing already -- I would just rather
keep coresched= separate from mitigations= and document it properly. TBH-
coresched does require system admin / designer to tag things as needed so why
pretend that its easy to configure anyway? :)


coresched=secure still won't allow you to trust your system without 
thinking about it, while nosmt does. So I would say that nosmt does not 
imply anything for coresched (until ,force is available, then we're 
talking ...)


The main thing I'm interested in though is mitigations=off. When you 
know you only care about performance and not side channel security (HPC 
for example), then you can in general just set mitigations=off. That 
should definitely affect the core scheduling setting as well.



Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879





Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Joel Fernandes
On Thu, Nov 12, 2020 at 03:52:32PM +0100, Alexander Graf wrote:
> 
> 
> On 12.11.20 14:40, Joel Fernandes wrote:
> > 
> > On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:
> > > 
> > > 
> > > On 11.11.20 23:15, Joel Fernandes wrote:
> > > > 
> > > > On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  
> > > > wrote:
> > > > > 
> > > > > On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  
> > > > > wrote:
> > > > > > On 11.11.20 22:14, Joel Fernandes wrote:
> > > > > > > > Some hardware such as certain AMD variants don't have cross-HT 
> > > > > > > > MDS/L1TF
> > > > > > > > issues. Detect this and don't enable core scheduling as it can
> > > > > > > > needlessly slow the device done.
> > > > > > > > 
> > > > > > > > diff --git a/arch/x86/kernel/cpu/bugs.c 
> > > > > > > > b/arch/x86/kernel/cpu/bugs.c
> > > > > > > > index dece79e4d1e9..0e6e61e49b23 100644
> > > > > > > > --- a/arch/x86/kernel/cpu/bugs.c
> > > > > > > > +++ b/arch/x86/kernel/cpu/bugs.c
> > > > > > > > @@ -152,6 +152,14 @@ void __init check_bugs(void)
> > > > > > > > #endif
> > > > > > > > }
> > > > > > > > 
> > > > > > > > +/*
> > > > > > > > + * Do not need core scheduling if CPU does not have MDS/L1TF 
> > > > > > > > vulnerability.
> > > > > > > > + */
> > > > > > > > +int arch_allow_core_sched(void)
> > > > > > > > +{
> > > > > > > > +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> > > > > > > > boot_cpu_has_bug(X86_BUG_L1TF);
> > > > > > 
> > > > > > Can we make this more generic and user settable, similar to the L1 
> > > > > > cache
> > > > > > flushing modes in KVM?
> > > > > > 
> > > > > > I am not 100% convinced that there are no other thread sibling 
> > > > > > attacks
> > > > > > possible without MDS and L1TF. If I'm paranoid, I want to still be 
> > > > > > able
> > > > > > to force enable core scheduling.
> > > > > > 
> > > > > > In addition, we are also using core scheduling as a poor man's 
> > > > > > mechanism
> > > > > > to give customers consistent performance for virtual machine thread
> > > > > > siblings. This is important irrespective of CPU bugs. In such a
> > > > > > scenario, I want to force enable core scheduling.
> > > > > 
> > > > > Ok,  I can make it new kernel command line option with:
> > > > > coresched=on
> > > > > coresched=secure (only if HW has MDS/L1TF)
> > > > > coresched=off
> > > > 
> > > > Also, I would keep "secure" as the default.  (And probably, we should
> > > > modify the informational messages in sysfs to reflect this..)
> > > 
> > > I agree that "secure" should be the default.
> > 
> > Ok.
> > 
> > > Can we also integrate into the "mitigations" kernel command line[1] for 
> > > this?
> > 
> > Sure, the integration into [1] sounds conceptually fine to me however it is
> > not super straight forward. Like: What if user wants to force-enable
> > core-scheduling for the usecase you mention, but still wants the cross-HT
> > mitigation because they are only tagging VMs (as in your usecase) and not
> > other tasks. Idk.
> 
> Can we roll this backwards from what you would expect as a user? How about
> we make this 2-dimensional?
> 
>   coresched=[on|off|secure][,force]
> 
> where "on" means "core scheduling can be done if colors are set", "off"
> means "no core scheduling is done" and "secure" means "core scheduling can
> be done on MDS or L1TF if colors are set".

So support for this force thing is not there ATM in the patchset. We can
always incrementally add it later. I personally don't expect users to be Ok
with tagging every single task as it is equivalent to disabling SMT and makes
coresched useless.

> The "force" option would then mean "apply a color to every new task".
> 
> What then happens with mitigations= is easy. "auto" means
> "coresched=secure". "off" means "coresched=off" and if you want to force
> core scheduling for everything if necessary, you just do mitigations=auto
> coresched=auto,force.
> 
> Am I missing something obvious? :)

I guess I am confused for the following usage:
mitigations=auto,nosmt coresched=secure

Note that auto,nosmt disables SMT selectively *only if needed*. Now, you add
coresched=secure to the mix. Should auto,nosmt disable SMT or not? It should be
disabled if the user did not tag anything (because system is insecure). It
should be enabled, if they tagged things. So it really depends on user doing
the right thing. And it is super confusing already -- I would just rather
keep coresched= separate from mitigations= and document it properly. TBH-
coresched does require system admin / designer to tag things as needed so why
pretend that its easy to configure anyway? :)

thanks,

 - Joel



Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Alexander Graf




On 12.11.20 14:40, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:



On 11.11.20 23:15, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  wrote:


On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:

On 11.11.20 22:14, Joel Fernandes wrote:

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
#endif
}

+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);


Can we make this more generic and user settable, similar to the L1 cache
flushing modes in KVM?

I am not 100% convinced that there are no other thread sibling attacks
possible without MDS and L1TF. If I'm paranoid, I want to still be able
to force enable core scheduling.

In addition, we are also using core scheduling as a poor man's mechanism
to give customers consistent performance for virtual machine thread
siblings. This is important irrespective of CPU bugs. In such a
scenario, I want to force enable core scheduling.


Ok,  I can make it new kernel command line option with:
coresched=on
coresched=secure (only if HW has MDS/L1TF)
coresched=off


Also, I would keep "secure" as the default.  (And probably, we should
modify the informational messages in sysfs to reflect this..)


I agree that "secure" should be the default.


Ok.


Can we also integrate into the "mitigations" kernel command line[1] for this?


Sure, the integration into [1] sounds conceptually fine to me however it is
not super straight forward. Like: What if user wants to force-enable
core-scheduling for the usecase you mention, but still wants the cross-HT
mitigation because they are only tagging VMs (as in your usecase) and not
other tasks. Idk.


Can we roll this backwards from what you would expect as a user? How 
about we make this 2-dimensional?


  coresched=[on|off|secure][,force]

where "on" means "core scheduling can be done if colors are set", "off" 
means "no core scheduling is done" and "secure" means "core scheduling 
can be done on MDS or L1TF if colors are set".


The "force" option would then mean "apply a color to every new task".

What then happens with mitigations= is easy. "auto" means 
"coresched=secure". "off" means "coresched=off" and if you want to force 
core scheduling for everything if necessary, you just do 
mitigations=auto coresched=auto,force.


Am I missing something obvious? :)


The best thing to do could be to keep the "auto disable HT" controls and
logic separate from the "coresched=on" logic and let the user choose. The
exception being, coresched=secure means that on HW that does not have
vulnerability, we will not activate the core scheduling.


I'm much more interested in the coresched=off one for mitigations=. It's 
what we have introduced a while back to save people from setting 50 
different command line options.



Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879





Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Joel Fernandes
On Thu, Nov 12, 2020 at 08:40:05AM -0500, Joel Fernandes wrote:
> On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:
> > 
> > 
> > On 11.11.20 23:15, Joel Fernandes wrote:
> > > 
> > > On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  
> > > wrote:
> > > > 
> > > > On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:
> > > > > On 11.11.20 22:14, Joel Fernandes wrote:
> > > > > > > Some hardware such as certain AMD variants don't have cross-HT 
> > > > > > > MDS/L1TF
> > > > > > > issues. Detect this and don't enable core scheduling as it can
> > > > > > > needlessly slow the device done.
> > > > > > > 
> > > > > > > diff --git a/arch/x86/kernel/cpu/bugs.c 
> > > > > > > b/arch/x86/kernel/cpu/bugs.c
> > > > > > > index dece79e4d1e9..0e6e61e49b23 100644
> > > > > > > --- a/arch/x86/kernel/cpu/bugs.c
> > > > > > > +++ b/arch/x86/kernel/cpu/bugs.c
> > > > > > > @@ -152,6 +152,14 @@ void __init check_bugs(void)
> > > > > > >#endif
> > > > > > >}
> > > > > > > 
> > > > > > > +/*
> > > > > > > + * Do not need core scheduling if CPU does not have MDS/L1TF 
> > > > > > > vulnerability.
> > > > > > > + */
> > > > > > > +int arch_allow_core_sched(void)
> > > > > > > +{
> > > > > > > +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> > > > > > > boot_cpu_has_bug(X86_BUG_L1TF);
> > > > > 
> > > > > Can we make this more generic and user settable, similar to the L1 
> > > > > cache
> > > > > flushing modes in KVM?
> > > > > 
> > > > > I am not 100% convinced that there are no other thread sibling attacks
> > > > > possible without MDS and L1TF. If I'm paranoid, I want to still be 
> > > > > able
> > > > > to force enable core scheduling.
> > > > > 
> > > > > In addition, we are also using core scheduling as a poor man's 
> > > > > mechanism
> > > > > to give customers consistent performance for virtual machine thread
> > > > > siblings. This is important irrespective of CPU bugs. In such a
> > > > > scenario, I want to force enable core scheduling.
> > > > 
> > > > Ok,  I can make it new kernel command line option with:
> > > > coresched=on
> > > > coresched=secure (only if HW has MDS/L1TF)
> > > > coresched=off
> > > 
> > > Also, I would keep "secure" as the default.  (And probably, we should
> > > modify the informational messages in sysfs to reflect this..)
> > 
> > I agree that "secure" should be the default.
> 
> Ok.

Something like so then:

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..3c2457d47f54 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,21 @@ void __init check_bugs(void)
 #endif
 }
 
+/*
+ * When coresched=secure, do not need coresched if CPU does not have MDS/L1TF 
bugs.
+ */
+int arch_allow_core_sched(void)
+{
+   /*
+* x86: Disallow coresched if it is in secure mode and the CPU does not
+* have vulnerabilities.
+*/
+   if (coresched_cmd_secure())
+   return boot_cpu_has_bug(X86_BUG_MDS) || 
boot_cpu_has_bug(X86_BUG_L1TF);
+   else
+   return true;
+}
+
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool 
setguest)
 {
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index d6428aaf67e7..1be5cf85a4a6 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -228,4 +228,7 @@ static inline int cpuhp_smt_disable(enum cpuhp_smt_control 
ctrlval) { return 0;
 extern bool cpu_mitigations_off(void);
 extern bool cpu_mitigations_auto_nosmt(void);
 
+extern bool coresched_cmd_off(void);
+extern bool coresched_cmd_secure(void);
+
 #endif /* _LINUX_CPU_H_ */
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 6ff2578ecf17..674edf534cc5 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -2552,3 +2552,46 @@ bool cpu_mitigations_auto_nosmt(void)
return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT;
 }
 EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt);
+
+/*
+ * These are used for a global "coresched=" cmdline option for controlling
+ * core scheduling. Note that core sched may be needed for usecases other
+ * than security as well.
+ */
+enum coresched_cmds {
+   CORE_SCHED_OFF,
+   CORE_SCHED_SECURE,
+   CORE_SCHED_ON,
+};
+
+static enum coresched_cmds coresched_cmd __ro_after_init = CORE_SCHED_SECURE;
+
+static int __init coresched_parse_cmdline(char *arg)
+{
+   if (!strcmp(arg, "off"))
+   coresched_cmd = CORE_SCHED_OFF;
+   else if (!strcmp(arg, "on"))
+   coresched_cmd = CORE_SCHED_ON;
+   else if (!strcmp(arg, "secure"))
+   coresched_cmd = CORE_SCHED_SECURE;
+   else
+   pr_crit("Unsupported coresched=%s, defaulting to secure.\n",
+   arg);
+
+   return 0;
+}
+early_param("coresched", coresched_parse_cmdline);
+
+/* coresched=off */
+bool coresched_cmd_off(void)
+{
+   return coresched_cmd == CORE_SCHED_OFF;
+}
+EXPORT_SYMBOL_GPL(coresched_cmd_off);
+
+/* coresched=secure */
+bool coresched_cmd_secure(void)
+{
+   

Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-12 Thread Joel Fernandes
On Wed, Nov 11, 2020 at 11:29:37PM +0100, Alexander Graf wrote:
> 
> 
> On 11.11.20 23:15, Joel Fernandes wrote:
> > 
> > On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  
> > wrote:
> > > 
> > > On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:
> > > > On 11.11.20 22:14, Joel Fernandes wrote:
> > > > > > Some hardware such as certain AMD variants don't have cross-HT 
> > > > > > MDS/L1TF
> > > > > > issues. Detect this and don't enable core scheduling as it can
> > > > > > needlessly slow the device done.
> > > > > > 
> > > > > > diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> > > > > > index dece79e4d1e9..0e6e61e49b23 100644
> > > > > > --- a/arch/x86/kernel/cpu/bugs.c
> > > > > > +++ b/arch/x86/kernel/cpu/bugs.c
> > > > > > @@ -152,6 +152,14 @@ void __init check_bugs(void)
> > > > > >#endif
> > > > > >}
> > > > > > 
> > > > > > +/*
> > > > > > + * Do not need core scheduling if CPU does not have MDS/L1TF 
> > > > > > vulnerability.
> > > > > > + */
> > > > > > +int arch_allow_core_sched(void)
> > > > > > +{
> > > > > > +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> > > > > > boot_cpu_has_bug(X86_BUG_L1TF);
> > > > 
> > > > Can we make this more generic and user settable, similar to the L1 cache
> > > > flushing modes in KVM?
> > > > 
> > > > I am not 100% convinced that there are no other thread sibling attacks
> > > > possible without MDS and L1TF. If I'm paranoid, I want to still be able
> > > > to force enable core scheduling.
> > > > 
> > > > In addition, we are also using core scheduling as a poor man's mechanism
> > > > to give customers consistent performance for virtual machine thread
> > > > siblings. This is important irrespective of CPU bugs. In such a
> > > > scenario, I want to force enable core scheduling.
> > > 
> > > Ok,  I can make it new kernel command line option with:
> > > coresched=on
> > > coresched=secure (only if HW has MDS/L1TF)
> > > coresched=off
> > 
> > Also, I would keep "secure" as the default.  (And probably, we should
> > modify the informational messages in sysfs to reflect this..)
> 
> I agree that "secure" should be the default.

Ok.

> Can we also integrate into the "mitigations" kernel command line[1] for this?

Sure, the integration into [1] sounds conceptually fine to me however it is
not super straight forward. Like: What if user wants to force-enable
core-scheduling for the usecase you mention, but still wants the cross-HT
mitigation because they are only tagging VMs (as in your usecase) and not
other tasks. Idk.

The best thing to do could be to keep the "auto disable HT" controls and
logic separate from the "coresched=on" logic and let the user choose. The
exception being, coresched=secure means that on HW that does not have
vulnerability, we will not activate the core scheduling.

thanks,

 - Joel


> Alex
> 
> [1] 
> https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/kernel-parameters.txt#n2839
> 
> 
> 
> Amazon Development Center Germany GmbH
> Krausenstr. 38
> 10117 Berlin
> Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
> Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
> Sitz: Berlin
> Ust-ID: DE 289 237 879
> 
> 


Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Alexander Graf



On 11.11.20 23:15, Joel Fernandes wrote:


On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  wrote:


On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:

On 11.11.20 22:14, Joel Fernandes wrote:

Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
   #endif
   }

+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);


Can we make this more generic and user settable, similar to the L1 cache
flushing modes in KVM?

I am not 100% convinced that there are no other thread sibling attacks
possible without MDS and L1TF. If I'm paranoid, I want to still be able
to force enable core scheduling.

In addition, we are also using core scheduling as a poor man's mechanism
to give customers consistent performance for virtual machine thread
siblings. This is important irrespective of CPU bugs. In such a
scenario, I want to force enable core scheduling.


Ok,  I can make it new kernel command line option with:
coresched=on
coresched=secure (only if HW has MDS/L1TF)
coresched=off


Also, I would keep "secure" as the default.  (And probably, we should
modify the informational messages in sysfs to reflect this..)


I agree that "secure" should be the default. Can we also integrate into 
the "mitigations" kernel command line[1] for this?



Alex

[1] 
https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/admin-guide/kernel-parameters.txt#n2839




Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Joel Fernandes
On Wed, Nov 11, 2020 at 5:13 PM Joel Fernandes  wrote:
>
> On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:
> > On 11.11.20 22:14, Joel Fernandes wrote:
> > >> Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
> > >> issues. Detect this and don't enable core scheduling as it can
> > >> needlessly slow the device done.
> > >>
> > >> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> > >> index dece79e4d1e9..0e6e61e49b23 100644
> > >> --- a/arch/x86/kernel/cpu/bugs.c
> > >> +++ b/arch/x86/kernel/cpu/bugs.c
> > >> @@ -152,6 +152,14 @@ void __init check_bugs(void)
> > >>   #endif
> > >>   }
> > >>
> > >> +/*
> > >> + * Do not need core scheduling if CPU does not have MDS/L1TF 
> > >> vulnerability.
> > >> + */
> > >> +int arch_allow_core_sched(void)
> > >> +{
> > >> +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> > >> boot_cpu_has_bug(X86_BUG_L1TF);
> >
> > Can we make this more generic and user settable, similar to the L1 cache
> > flushing modes in KVM?
> >
> > I am not 100% convinced that there are no other thread sibling attacks
> > possible without MDS and L1TF. If I'm paranoid, I want to still be able
> > to force enable core scheduling.
> >
> > In addition, we are also using core scheduling as a poor man's mechanism
> > to give customers consistent performance for virtual machine thread
> > siblings. This is important irrespective of CPU bugs. In such a
> > scenario, I want to force enable core scheduling.
>
> Ok,  I can make it new kernel command line option with:
> coresched=on
> coresched=secure (only if HW has MDS/L1TF)
> coresched=off

Also, I would keep "secure" as the default.  (And probably, we should
modify the informational messages in sysfs to reflect this..)

 - Joel


Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Joel Fernandes
On Wed, Nov 11, 2020 at 5:00 PM Alexander Graf  wrote:
>
>
>
> On 11.11.20 22:14, Joel Fernandes wrote:
> >
> > Sorry, forgot to CC +Tom Lendacky . Please take a look Tom, thanks.
> >
> > On Wed, Nov 11, 2020 at 4:10 PM Joel Fernandes (Google)
> >  wrote:
> >>
> >> Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
> >> issues. Detect this and don't enable core scheduling as it can
> >> needlessly slow the device done.
> >>
> >> Signed-off-by: Joel Fernandes (Google) 
> >> ---
> >>   arch/x86/kernel/cpu/bugs.c | 8 
> >>   kernel/sched/core.c| 7 +++
> >>   kernel/sched/sched.h   | 5 +
> >>   3 files changed, 20 insertions(+)
> >>
> >> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> >> index dece79e4d1e9..0e6e61e49b23 100644
> >> --- a/arch/x86/kernel/cpu/bugs.c
> >> +++ b/arch/x86/kernel/cpu/bugs.c
> >> @@ -152,6 +152,14 @@ void __init check_bugs(void)
> >>   #endif
> >>   }
> >>
> >> +/*
> >> + * Do not need core scheduling if CPU does not have MDS/L1TF 
> >> vulnerability.
> >> + */
> >> +int arch_allow_core_sched(void)
> >> +{
> >> +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> >> boot_cpu_has_bug(X86_BUG_L1TF);
>
> Can we make this more generic and user settable, similar to the L1 cache
> flushing modes in KVM?
>
> I am not 100% convinced that there are no other thread sibling attacks
> possible without MDS and L1TF. If I'm paranoid, I want to still be able
> to force enable core scheduling.
>
> In addition, we are also using core scheduling as a poor man's mechanism
> to give customers consistent performance for virtual machine thread
> siblings. This is important irrespective of CPU bugs. In such a
> scenario, I want to force enable core scheduling.

Ok,  I can make it new kernel command line option with:
coresched=on
coresched=secure (only if HW has MDS/L1TF)
coresched=off

Does that work for you?  Others, thoughts?

Thanks,
- Joel


Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Alexander Graf



On 11.11.20 22:14, Joel Fernandes wrote:


Sorry, forgot to CC +Tom Lendacky . Please take a look Tom, thanks.

On Wed, Nov 11, 2020 at 4:10 PM Joel Fernandes (Google)
 wrote:


Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

Signed-off-by: Joel Fernandes (Google) 
---
  arch/x86/kernel/cpu/bugs.c | 8 
  kernel/sched/core.c| 7 +++
  kernel/sched/sched.h   | 5 +
  3 files changed, 20 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
  #endif
  }

+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);


Can we make this more generic and user settable, similar to the L1 cache 
flushing modes in KVM?


I am not 100% convinced that there are no other thread sibling attacks 
possible without MDS and L1TF. If I'm paranoid, I want to still be able 
to force enable core scheduling.


In addition, we are also using core scheduling as a poor man's mechanism 
to give customers consistent performance for virtual machine thread 
siblings. This is important irrespective of CPU bugs. In such a 
scenario, I want to force enable core scheduling.



Alex



Amazon Development Center Germany GmbH
Krausenstr. 38
10117 Berlin
Geschaeftsfuehrung: Christian Schlaeger, Jonathan Weiss
Eingetragen am Amtsgericht Charlottenburg unter HRB 149173 B
Sitz: Berlin
Ust-ID: DE 289 237 879




Re: [RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Joel Fernandes
Sorry, forgot to CC +Tom Lendacky . Please take a look Tom, thanks.

On Wed, Nov 11, 2020 at 4:10 PM Joel Fernandes (Google)
 wrote:
>
> Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
> issues. Detect this and don't enable core scheduling as it can
> needlessly slow the device done.
>
> Signed-off-by: Joel Fernandes (Google) 
> ---
>  arch/x86/kernel/cpu/bugs.c | 8 
>  kernel/sched/core.c| 7 +++
>  kernel/sched/sched.h   | 5 +
>  3 files changed, 20 insertions(+)
>
> diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
> index dece79e4d1e9..0e6e61e49b23 100644
> --- a/arch/x86/kernel/cpu/bugs.c
> +++ b/arch/x86/kernel/cpu/bugs.c
> @@ -152,6 +152,14 @@ void __init check_bugs(void)
>  #endif
>  }
>
> +/*
> + * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
> + */
> +int arch_allow_core_sched(void)
> +{
> +   return boot_cpu_has_bug(X86_BUG_MDS) || 
> boot_cpu_has_bug(X86_BUG_L1TF);
> +}
> +
>  void
>  x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool 
> setguest)
>  {
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 64c559192634..c6158b4959fe 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -319,6 +319,13 @@ static void __sched_core_enable(void)
> for_each_online_cpu(cpu)
> BUG_ON(!sched_core_empty(cpu_rq(cpu)));
>
> +   /*
> +* Some architectures may not want coresched. (ex, AMD does not have
> +* MDS/L1TF issues so it wants SMT completely on).
> +*/
> +   if (!arch_allow_core_sched())
> +   return;
> +
> static_branch_enable(&__sched_core_enabled);
> stop_machine(__sched_core_stopper, (void *)true, NULL);
>
> diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
> index 3cf08c77b678..a1b39764a6ed 100644
> --- a/kernel/sched/sched.h
> +++ b/kernel/sched/sched.h
> @@ -1203,6 +1203,11 @@ int cpu_core_tag_color_write_u64(struct 
> cgroup_subsys_state *css,
>
>  bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
>
> +int __weak arch_allow_core_sched(void)
> +{
> +   return true;
> +}
> +
>  #else /* !CONFIG_SCHED_CORE */
>
>  static inline bool sched_core_enqueued(struct task_struct *task) { return 
> false; }
> --
> 2.29.2.222.g5d2a92d10f8-goog
>


[RFC 1/2] x86/bugs: Disable coresched on hardware that does not need it

2020-11-11 Thread Joel Fernandes (Google)
Some hardware such as certain AMD variants don't have cross-HT MDS/L1TF
issues. Detect this and don't enable core scheduling as it can
needlessly slow the device done.

Signed-off-by: Joel Fernandes (Google) 
---
 arch/x86/kernel/cpu/bugs.c | 8 
 kernel/sched/core.c| 7 +++
 kernel/sched/sched.h   | 5 +
 3 files changed, 20 insertions(+)

diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index dece79e4d1e9..0e6e61e49b23 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -152,6 +152,14 @@ void __init check_bugs(void)
 #endif
 }
 
+/*
+ * Do not need core scheduling if CPU does not have MDS/L1TF vulnerability.
+ */
+int arch_allow_core_sched(void)
+{
+   return boot_cpu_has_bug(X86_BUG_MDS) || boot_cpu_has_bug(X86_BUG_L1TF);
+}
+
 void
 x86_virt_spec_ctrl(u64 guest_spec_ctrl, u64 guest_virt_spec_ctrl, bool 
setguest)
 {
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 64c559192634..c6158b4959fe 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -319,6 +319,13 @@ static void __sched_core_enable(void)
for_each_online_cpu(cpu)
BUG_ON(!sched_core_empty(cpu_rq(cpu)));
 
+   /*
+* Some architectures may not want coresched. (ex, AMD does not have
+* MDS/L1TF issues so it wants SMT completely on).
+*/
+   if (!arch_allow_core_sched())
+   return;
+
static_branch_enable(&__sched_core_enabled);
stop_machine(__sched_core_stopper, (void *)true, NULL);
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 3cf08c77b678..a1b39764a6ed 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1203,6 +1203,11 @@ int cpu_core_tag_color_write_u64(struct 
cgroup_subsys_state *css,
 
 bool cfs_prio_less(struct task_struct *a, struct task_struct *b, bool fi);
 
+int __weak arch_allow_core_sched(void)
+{
+   return true;
+}
+
 #else /* !CONFIG_SCHED_CORE */
 
 static inline bool sched_core_enqueued(struct task_struct *task) { return 
false; }
-- 
2.29.2.222.g5d2a92d10f8-goog