RE: [PATCH v3 3/5] x86/umwait: Add sysfs interface to control umwait C0.2 state

2019-05-30 Thread Yu, Fenghua
> On Thursday, May 30, 2019 2:11 PM Andy Lutomirski [mailto:l...@kernel.org] 
> wrote:
> On Fri, May 24, 2019 at 5:05 PM Fenghua Yu  wrote:
> >
> > C0.2 state in umwait and tpause instructions can be enabled or
> > disabled on a processor through IA32_UMWAIT_CONTROL MSR register.
> >
> > By default, C0.2 is enabled and the user wait instructions result in
> > lower power consumption with slower wakeup time.
> >
> > But in real time systems which requrie faster wakeup time although
> > power savings could be smaller, the administrator needs to disable
> > C0.2 and all
> > C0.2 requests from user applications revert to C0.1.
> >
> > A sysfs interface "/sys/devices/system/cpu/umwait_control/enable_c0_2"
> > is created to allow the administrator to control C0.2 state during run time.
> >
> > Signed-off-by: Fenghua Yu 
> > Reviewed-by: Ashok Raj 
> > Reviewed-by: Tony Luck 
> > ---
> >  arch/x86/power/umwait.c | 75
> > ++---
> >  1 file changed, 71 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c index
> > 80cc53a9c2d0..cf5de7e1cc24 100644
> > --- a/arch/x86/power/umwait.c
> > +++ b/arch/x86/power/umwait.c
> > @@ -7,6 +7,7 @@
> >  static bool umwait_c0_2_enabled = true;
> >  /* Umwait max time is in TSC-quanta. Bits[1:0] are zero. */  static
> > u32 umwait_max_time = 10;
> > +static DEFINE_MUTEX(umwait_lock);
> >
> >  /* Return value that will be used to set IA32_UMWAIT_CONTROL MSR */
> > static u32 umwait_compute_msr_value(void) @@ -22,7 +23,7 @@ static
> u32
> > umwait_compute_msr_value(void)
> >(umwait_max_time & MSR_IA32_UMWAIT_CONTROL_MAX_TIME);
> >  }
> >
> > -static void umwait_control_msr_update(void)
> > +static void umwait_control_msr_update(void *unused)
> >  {
> > u32 msr_val;
> >
> > @@ -33,7 +34,9 @@ static void umwait_control_msr_update(void)
> >  /* Set up IA32_UMWAIT_CONTROL MSR on CPU using the current global
> > setting. */  static int umwait_cpu_online(unsigned int cpu)  {
> > -   umwait_control_msr_update();
> > +   mutex_lock(_lock);
> > +   umwait_control_msr_update(NULL);
> > +   mutex_unlock(_lock);
> 
> What's the mutex for?  Can't you just use READ_ONCE?

umwait_control_msr_update() will write both umwait_c0_2_enabled and 
umwait_max_time (which also can be
changed through sysfs in the next patch) to the TEST_CTRL MSR.

Just using READ_ONCE() for the two variables cannot guarantee all CPUs have the 
same setting of C0.2 and max time.
READ_ONCE() and WRITE_ONCE() can only guarantee atomicity for reading and 
writng the same variable.

For e.g. without mutex protection:

initial values: umwait_c0_2_enabled=1 and umwait_max_time=10

1. umwait_cpu_online(X): read umwait_c0_2_enabled as 1
2. enable_c0_2_store(): umwait_c0_2_enabled = 0 and update all online CPUs as 
C0.2 disabled.
3. umwait_cpu_online(X): read umwait_max_time=10
4. umwait_cpu_online(Y): read umwait_c0_2_enabled as 0
5. umwait_max_time_store(): umwait_max_time=500 and update all online CPUs as 
max time = 500 cycles.
6. umwait_cpu_online(Y): read umwait_max_time as 500
7. umwati_cpu_online(X): wrmsr() enables C0.2 and sets max time 10 on CPU X
8. umwait_cpu_online(Y): disables C0.2 and sets  max time 500 on CPU Y

With the mutex to protect the two variables and wrmsr(), each CPU will have the 
same setting of C0.2 and max time.

> 
> > +static void umwait_control_msr_update_all_cpus(void)
> > +{
> > +   u32 msr_val;
> > +
> > +   msr_val = umwait_compute_msr_value();
> > +   /* All CPUs have same umwait control setting */
> > +   on_each_cpu(umwait_control_msr_update, NULL, 1);
> 
> Why are you calling umwait_compute_msr_value()?

Umwait_compute_msr_value() computes the TEST_CTL value from two variables 
umwait_c0_2_enabled and umwait_max_time.
Any of the two variables may be changed when  
umwait_control_msr_update_all_cpus() is called. So need to re-calculate the
MSR value then write the value to MSR on all CPUs.

Thanks.

-Fenghua


Re: [PATCH v3 3/5] x86/umwait: Add sysfs interface to control umwait C0.2 state

2019-05-30 Thread Andy Lutomirski
On Fri, May 24, 2019 at 5:05 PM Fenghua Yu  wrote:
>
> C0.2 state in umwait and tpause instructions can be enabled or disabled
> on a processor through IA32_UMWAIT_CONTROL MSR register.
>
> By default, C0.2 is enabled and the user wait instructions result in
> lower power consumption with slower wakeup time.
>
> But in real time systems which requrie faster wakeup time although power
> savings could be smaller, the administrator needs to disable C0.2 and all
> C0.2 requests from user applications revert to C0.1.
>
> A sysfs interface "/sys/devices/system/cpu/umwait_control/enable_c0_2" is
> created to allow the administrator to control C0.2 state during run time.
>
> Signed-off-by: Fenghua Yu 
> Reviewed-by: Ashok Raj 
> Reviewed-by: Tony Luck 
> ---
>  arch/x86/power/umwait.c | 75 ++---
>  1 file changed, 71 insertions(+), 4 deletions(-)
>
> diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c
> index 80cc53a9c2d0..cf5de7e1cc24 100644
> --- a/arch/x86/power/umwait.c
> +++ b/arch/x86/power/umwait.c
> @@ -7,6 +7,7 @@
>  static bool umwait_c0_2_enabled = true;
>  /* Umwait max time is in TSC-quanta. Bits[1:0] are zero. */
>  static u32 umwait_max_time = 10;
> +static DEFINE_MUTEX(umwait_lock);
>
>  /* Return value that will be used to set IA32_UMWAIT_CONTROL MSR */
>  static u32 umwait_compute_msr_value(void)
> @@ -22,7 +23,7 @@ static u32 umwait_compute_msr_value(void)
>(umwait_max_time & MSR_IA32_UMWAIT_CONTROL_MAX_TIME);
>  }
>
> -static void umwait_control_msr_update(void)
> +static void umwait_control_msr_update(void *unused)
>  {
> u32 msr_val;
>
> @@ -33,7 +34,9 @@ static void umwait_control_msr_update(void)
>  /* Set up IA32_UMWAIT_CONTROL MSR on CPU using the current global setting. */
>  static int umwait_cpu_online(unsigned int cpu)
>  {
> -   umwait_control_msr_update();
> +   mutex_lock(_lock);
> +   umwait_control_msr_update(NULL);
> +   mutex_unlock(_lock);

What's the mutex for?  Can't you just use READ_ONCE?

> +static void umwait_control_msr_update_all_cpus(void)
> +{
> +   u32 msr_val;
> +
> +   msr_val = umwait_compute_msr_value();
> +   /* All CPUs have same umwait control setting */
> +   on_each_cpu(umwait_control_msr_update, NULL, 1);

Why are you calling umwait_compute_msr_value()?


[PATCH v3 3/5] x86/umwait: Add sysfs interface to control umwait C0.2 state

2019-05-24 Thread Fenghua Yu
C0.2 state in umwait and tpause instructions can be enabled or disabled
on a processor through IA32_UMWAIT_CONTROL MSR register.

By default, C0.2 is enabled and the user wait instructions result in
lower power consumption with slower wakeup time.

But in real time systems which requrie faster wakeup time although power
savings could be smaller, the administrator needs to disable C0.2 and all
C0.2 requests from user applications revert to C0.1.

A sysfs interface "/sys/devices/system/cpu/umwait_control/enable_c0_2" is
created to allow the administrator to control C0.2 state during run time.

Signed-off-by: Fenghua Yu 
Reviewed-by: Ashok Raj 
Reviewed-by: Tony Luck 
---
 arch/x86/power/umwait.c | 75 ++---
 1 file changed, 71 insertions(+), 4 deletions(-)

diff --git a/arch/x86/power/umwait.c b/arch/x86/power/umwait.c
index 80cc53a9c2d0..cf5de7e1cc24 100644
--- a/arch/x86/power/umwait.c
+++ b/arch/x86/power/umwait.c
@@ -7,6 +7,7 @@
 static bool umwait_c0_2_enabled = true;
 /* Umwait max time is in TSC-quanta. Bits[1:0] are zero. */
 static u32 umwait_max_time = 10;
+static DEFINE_MUTEX(umwait_lock);
 
 /* Return value that will be used to set IA32_UMWAIT_CONTROL MSR */
 static u32 umwait_compute_msr_value(void)
@@ -22,7 +23,7 @@ static u32 umwait_compute_msr_value(void)
   (umwait_max_time & MSR_IA32_UMWAIT_CONTROL_MAX_TIME);
 }
 
-static void umwait_control_msr_update(void)
+static void umwait_control_msr_update(void *unused)
 {
u32 msr_val;
 
@@ -33,7 +34,9 @@ static void umwait_control_msr_update(void)
 /* Set up IA32_UMWAIT_CONTROL MSR on CPU using the current global setting. */
 static int umwait_cpu_online(unsigned int cpu)
 {
-   umwait_control_msr_update();
+   mutex_lock(_lock);
+   umwait_control_msr_update(NULL);
+   mutex_unlock(_lock);
 
return 0;
 }
@@ -49,24 +52,88 @@ static int umwait_cpu_online(unsigned int cpu)
  */
 static void umwait_syscore_resume(void)
 {
-   umwait_control_msr_update();
+   /* No need to lock because only BP is running now. */
+   umwait_control_msr_update(NULL);
 }
 
 static struct syscore_ops umwait_syscore_ops = {
.resume = umwait_syscore_resume,
 };
 
+static ssize_t
+enable_c0_2_show(struct device *dev, struct device_attribute *attr,
+char *buf)
+{
+   return sprintf(buf, "%d\n", umwait_c0_2_enabled);
+}
+
+static void umwait_control_msr_update_all_cpus(void)
+{
+   u32 msr_val;
+
+   msr_val = umwait_compute_msr_value();
+   /* All CPUs have same umwait control setting */
+   on_each_cpu(umwait_control_msr_update, NULL, 1);
+}
+
+static ssize_t enable_c0_2_store(struct device *dev,
+struct device_attribute *attr,
+const char *buf, size_t count)
+{
+   bool c0_2_enabled;
+   int ret;
+
+   ret = kstrtobool(buf, _2_enabled);
+   if (ret)
+   return ret;
+
+   mutex_lock(_lock);
+
+   if (umwait_c0_2_enabled == c0_2_enabled)
+   goto out_unlock;
+
+   umwait_c0_2_enabled = c0_2_enabled;
+   /* Enable/disable C0.2 state on all CPUs */
+   umwait_control_msr_update_all_cpus();
+
+out_unlock:
+   mutex_unlock(_lock);
+
+   return count;
+}
+static DEVICE_ATTR_RW(enable_c0_2);
+
+static struct attribute *umwait_attrs[] = {
+   _attr_enable_c0_2.attr,
+   NULL
+};
+
+static struct attribute_group umwait_attr_group = {
+   .attrs = umwait_attrs,
+   .name = "umwait_control",
+};
+
 static int __init umwait_init(void)
 {
+   struct device *dev;
int ret;
 
if (!boot_cpu_has(X86_FEATURE_WAITPKG))
return -ENODEV;
 
+   /* Add umwait control interface. */
+   dev = cpu_subsys.dev_root;
+   ret = sysfs_create_group(>kobj, _attr_group);
+   if (ret)
+   return ret;
+
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "umwait/intel:online",
umwait_cpu_online, NULL);
-   if (ret < 0)
+   if (ret < 0) {
+   sysfs_remove_group(>kobj, _attr_group);
+
return ret;
+   }
 
register_syscore_ops(_syscore_ops);
 
-- 
2.19.1