Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Paolo Bonzini
Il 19/08/2014 22:35, Radim Krčmář ha scritto:
 Every increase of ple_window_grow creates potential overflows.
 They are not serious, because we clamp ple_window and userspace is
 expected to fix ple_window_max within a second.
 ---
  arch/x86/kvm/vmx.c | 34 +-
  1 file changed, 33 insertions(+), 1 deletion(-)
 
 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index d7f58e8..6873a0b 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -138,7 +138,9 @@ module_param(ple_window, int, S_IRUGO | S_IWUSR);
  
  /* Default doubles per-vcpu window every exit. */
  static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
 -module_param(ple_window_grow, int, S_IRUGO | S_IWUSR);
 +static struct kernel_param_ops ple_window_grow_ops;
 +module_param_cb(ple_window_grow, ple_window_grow_ops,
 +ple_window_grow, S_IRUGO | S_IWUSR);
  
  /* Default resets per-vcpu window every exit to ple_window. */
  static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
 @@ -5717,6 +5719,36 @@ static void type##_ple_window(struct kvm_vcpu *vcpu) \
  make_ple_window_modifier(grow,   *, +) /* grow_ple_window */
  make_ple_window_modifier(shrink, /, -) /* shrink_ple_window */
  
 +static void clamp_ple_window_max(void)
 +{
 + int maximum;
 +
 + if (ple_window_grow  1)
 + return;
 +
 + if (ple_window_grow  ple_window)
 + maximum = INT_MAX / ple_window_grow;
 + else
 + maximum = INT_MAX - ple_window_grow;
 +
 + ple_window_max = clamp(ple_window_max, ple_window, maximum);
 +}

I think avoiding overflows is better.  In fact, I think you should call
this function for ple_window_max too.

You could keep the ple_window_max variable to the user-set value.
Whenever ple_window_grow or ple_window_max are changed, you can set an
internal variable (let's call it ple_window_actual_max, but I'm not wed
to this name) to the computed value, and then do:

if (ple_window_grow  1 || ple_window_actual_max  ple_window)
new = ple_window;
else if (ple_window_grow  ple_window)
new = max(ple_window_actual_max, old) * ple_window_grow;
else
new = max(ple_window_actual_max, old) + ple_window_grow;

(I think the || in the first if can be eliminated with some creativity
in clamp_ple_window_max).

Paolo

 +static int set_ple_window_grow(const char *arg, const struct kernel_param 
 *kp)
 +{
 + int ret;
 +
 + clamp_ple_window_max();
 + ret = param_set_int(arg, kp);
 +
 + return ret;
 +}
 +
 +static struct kernel_param_ops ple_window_grow_ops = {
 + .set = set_ple_window_grow,
 + .get = param_get_int,
 +};
 +
  /*
   * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
   * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Paolo Bonzini
Il 20/08/2014 09:16, Paolo Bonzini ha scritto:
 Il 19/08/2014 22:35, Radim Krčmář ha scritto:
 Every increase of ple_window_grow creates potential overflows.
 They are not serious, because we clamp ple_window and userspace is
 expected to fix ple_window_max within a second.
 ---
  arch/x86/kvm/vmx.c | 34 +-
  1 file changed, 33 insertions(+), 1 deletion(-)

 diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
 index d7f58e8..6873a0b 100644
 --- a/arch/x86/kvm/vmx.c
 +++ b/arch/x86/kvm/vmx.c
 @@ -138,7 +138,9 @@ module_param(ple_window, int, S_IRUGO | S_IWUSR);
  
  /* Default doubles per-vcpu window every exit. */
  static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
 -module_param(ple_window_grow, int, S_IRUGO | S_IWUSR);
 +static struct kernel_param_ops ple_window_grow_ops;
 +module_param_cb(ple_window_grow, ple_window_grow_ops,
 +ple_window_grow, S_IRUGO | S_IWUSR);
  
  /* Default resets per-vcpu window every exit to ple_window. */
  static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
 @@ -5717,6 +5719,36 @@ static void type##_ple_window(struct kvm_vcpu *vcpu) \
  make_ple_window_modifier(grow,   *, +) /* grow_ple_window */
  make_ple_window_modifier(shrink, /, -) /* shrink_ple_window */
  
 +static void clamp_ple_window_max(void)
 +{
 +int maximum;
 +
 +if (ple_window_grow  1)
 +return;
 +
 +if (ple_window_grow  ple_window)
 +maximum = INT_MAX / ple_window_grow;
 +else
 +maximum = INT_MAX - ple_window_grow;
 +
 +ple_window_max = clamp(ple_window_max, ple_window, maximum);
 +}
 
 I think avoiding overflows is better.  In fact, I think you should call
 this function for ple_window_max too.
 
 You could keep the ple_window_max variable to the user-set value.
 Whenever ple_window_grow or ple_window_max are changed, you can set an
 internal variable (let's call it ple_window_actual_max, but I'm not wed
 to this name) to the computed value, and then do:
 
   if (ple_window_grow  1 || ple_window_actual_max  ple_window)
   new = ple_window;
   else if (ple_window_grow  ple_window)
   new = max(ple_window_actual_max, old) * ple_window_grow;
   else
   new = max(ple_window_actual_max, old) + ple_window_grow;

Ehm, this should of course be min.

Paolo

 (I think the || in the first if can be eliminated with some creativity
 in clamp_ple_window_max).
 
 Paolo
 
 +static int set_ple_window_grow(const char *arg, const struct kernel_param 
 *kp)
 +{
 +int ret;
 +
 +clamp_ple_window_max();
 +ret = param_set_int(arg, kp);
 +
 +return ret;
 +}
 +
 +static struct kernel_param_ops ple_window_grow_ops = {
 +.set = set_ple_window_grow,
 +.get = param_get_int,
 +};
 +
  /*
   * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
   * exiting, so only get here on cpu with PAUSE-Loop-Exiting.

 
 --
 To unsubscribe from this list: send the line unsubscribe kvm in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Radim Krčmář
2014-08-20 09:16+0200, Paolo Bonzini:
 Il 19/08/2014 22:35, Radim Krčmář ha scritto:
  Every increase of ple_window_grow creates potential overflows.
  They are not serious, because we clamp ple_window and userspace is
  expected to fix ple_window_max within a second.
  ---
 I think avoiding overflows is better.  In fact, I think you should call
 this function for ple_window_max too.

(Ack, I just wanted to avoid the worst userspace error, which is why
 PW_max hasn't changed when PW_grow got smaller and we could overflow.)

 You could keep the ple_window_max variable to the user-set value.
 Whenever ple_window_grow or ple_window_max are changed, you can set an
 internal variable (let's call it ple_window_actual_max, but I'm not wed
 to this name) to the computed value, and then do:
 
   if (ple_window_grow  1 || ple_window_actual_max  ple_window)
   new = ple_window;
   else if (ple_window_grow  ple_window)
   new = max(ple_window_actual_max, old) * ple_window_grow;
   else
   new = max(ple_window_actual_max, old) + ple_window_grow;

Oh, I like that this can get rid of all overflows, ple_window_actual_max
(PW_effective_max?) is going to be set to
ple_window_max [/-] ple_window_grow in v2.

 (I think the || in the first if can be eliminated with some creativity
 in clamp_ple_window_max).

To do it, we'll want to intercept changes to ple_window as well.
(I disliked this patch a lot even before :)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Paolo Bonzini
Il 20/08/2014 14:41, Radim Krčmář ha scritto:
 if (ple_window_grow  1 || ple_window_actual_max  ple_window)
 new = ple_window;
 else if (ple_window_grow  ple_window)
 new = max(ple_window_actual_max, old) * ple_window_grow;
 else
 new = max(ple_window_actual_max, old) + ple_window_grow;
 Oh, I like that this can get rid of all overflows, ple_window_actual_max
 (PW_effective_max?) is going to be set to
 ple_window_max [/-] ple_window_grow in v2.
 
  (I think the || in the first if can be eliminated with some creativity
  in clamp_ple_window_max).
 To do it, we'll want to intercept changes to ple_window as well.
 (I disliked this patch a lot even before :)

What about setting ple_window_actual_max to 0 if ple_window_grow is 0
(instead of just returning)?

Then the if (ple_window_actual_max  ple_window) will always fail and
you'll go through new = ple_window.  But perhaps it's more gross and
worthless than creative. :)

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Radim Krčmář
2014-08-20 15:15+0200, Paolo Bonzini:
 Il 20/08/2014 14:41, Radim Krčmář ha scritto:
if (ple_window_grow  1 || ple_window_actual_max  ple_window)
new = ple_window;
else if (ple_window_grow  ple_window)
new = max(ple_window_actual_max, old) * ple_window_grow;
else
new = max(ple_window_actual_max, old) + ple_window_grow;
  Oh, I like that this can get rid of all overflows, ple_window_actual_max
  (PW_effective_max?) is going to be set to
  ple_window_max [/-] ple_window_grow in v2.
  
   (I think the || in the first if can be eliminated with some creativity
   in clamp_ple_window_max).
  To do it, we'll want to intercept changes to ple_window as well.
  (I disliked this patch a lot even before :)
 
 What about setting ple_window_actual_max to 0 if ple_window_grow is 0
 (instead of just returning)?
 
 Then the if (ple_window_actual_max  ple_window) will always fail and
 you'll go through new = ple_window.  But perhaps it's more gross and
 worthless than creative. :)

That code can't use PW directly, because PW_actual_max needs to be one
PW_grow below PW_max, so I'd rather enforce minimal PW_actual_max.

Btw. without extra code, we are still going to overflow on races when
changing PW_grow, should they be covered as well?

(+ There is a bug in this patch -- clamp_ple_window_max() should be
 after param_set_int() ... damned unreviewed last-second changes.)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Paolo Bonzini
Il 20/08/2014 17:31, Radim Krčmář ha scritto:
 Btw. without extra code, we are still going to overflow on races when
 changing PW_grow, should they be covered as well?

You mean because there is no spinlock or similar protecting the changes?
 I guess you could use a seqlock.

Paolo
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Radim Krčmář
2014-08-20 17:34+0200, Paolo Bonzini:
 Il 20/08/2014 17:31, Radim Krčmář ha scritto:
  Btw. without extra code, we are still going to overflow on races when
  changing PW_grow, should they be covered as well?
 
 You mean because there is no spinlock or similar protecting the changes?
  I guess you could use a seqlock.

Yes, for example between a modification of ple_window
  new = min(old, PW_actual_max) * PW_grow
which gets compiled into something like this:
  1) tmp = min(old, PW_actual_max)
  2) new = tmp * PW_grow
and a write to increase PW_grow
  3) PW_actual_max = min(PW_max / new_PW_grow, PW_actual_max)
  4) PW_grow = new_PW_grow
  5) PW_actual_max = PW_max / new_PW_grow

3 and 4 can exectute between 1 and 2, which could overflow.

I don't think they are important enough to warrant a significant
performance hit of locking.
Or even more checks that would prevent it in a lockless way.

(I'd just see that the result is set to something legal and also drop
 line 3, because it does not help things that much.)
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Paolo Bonzini
Il 20/08/2014 18:01, Radim Krčmář ha scritto:
 2014-08-20 17:34+0200, Paolo Bonzini:
 Il 20/08/2014 17:31, Radim Krčmář ha scritto:
 Btw. without extra code, we are still going to overflow on races when
 changing PW_grow, should they be covered as well?

 You mean because there is no spinlock or similar protecting the changes?
  I guess you could use a seqlock.
 
 Yes, for example between a modification of ple_window
   new = min(old, PW_actual_max) * PW_grow
 which gets compiled into something like this:
   1) tmp = min(old, PW_actual_max)
   2) new = tmp * PW_grow
 and a write to increase PW_grow
   3) PW_actual_max = min(PW_max / new_PW_grow, PW_actual_max)
   4) PW_grow = new_PW_grow
   5) PW_actual_max = PW_max / new_PW_grow
 
 3 and 4 can exectute between 1 and 2, which could overflow.
 
 I don't think they are important enough to warrant a significant
 performance hit of locking.

A seqlock just costs two memory accesses to the same (shared) cache line
as the PW data, and a non-taken branch.  I don't like code that is
unsafe by design...

Paolo

 Or even more checks that would prevent it in a lockless way.
 
 (I'd just see that the result is set to something legal and also drop
  line 3, because it does not help things that much.)
 

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-20 Thread Radim Krčmář
2014-08-20 18:03+0200, Paolo Bonzini:
 Il 20/08/2014 18:01, Radim Krčmář ha scritto:
  2014-08-20 17:34+0200, Paolo Bonzini:
  Il 20/08/2014 17:31, Radim Krčmář ha scritto:
  Btw. without extra code, we are still going to overflow on races when
  changing PW_grow, should they be covered as well?
 
  You mean because there is no spinlock or similar protecting the changes?
   I guess you could use a seqlock.
  
  Yes, for example between a modification of ple_window
new = min(old, PW_actual_max) * PW_grow
  which gets compiled into something like this:
1) tmp = min(old, PW_actual_max)
2) new = tmp * PW_grow
  and a write to increase PW_grow
3) PW_actual_max = min(PW_max / new_PW_grow, PW_actual_max)
4) PW_grow = new_PW_grow
5) PW_actual_max = PW_max / new_PW_grow
  
  3 and 4 can exectute between 1 and 2, which could overflow.
  
  I don't think they are important enough to warrant a significant
  performance hit of locking.
 
 A seqlock just costs two memory accesses to the same (shared) cache line
 as the PW data, and a non-taken branch.

Oh, seqlock readers do not have to write to shared memory, so it is
acceptable ...

 I don't like code that is
 unsafe by design...

I wouldn't say it is unsafe, because VCPU's PW is always greater than
module's PW. We are just going to PLE exit sooner than expected.
--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 9/9] KVM: VMX: automatic PLE window maximum

2014-08-19 Thread Radim Krčmář
Every increase of ple_window_grow creates potential overflows.
They are not serious, because we clamp ple_window and userspace is
expected to fix ple_window_max within a second.
---
 arch/x86/kvm/vmx.c | 34 +-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d7f58e8..6873a0b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -138,7 +138,9 @@ module_param(ple_window, int, S_IRUGO | S_IWUSR);
 
 /* Default doubles per-vcpu window every exit. */
 static int ple_window_grow = KVM_VMX_DEFAULT_PLE_WINDOW_GROW;
-module_param(ple_window_grow, int, S_IRUGO | S_IWUSR);
+static struct kernel_param_ops ple_window_grow_ops;
+module_param_cb(ple_window_grow, ple_window_grow_ops,
+ple_window_grow, S_IRUGO | S_IWUSR);
 
 /* Default resets per-vcpu window every exit to ple_window. */
 static int ple_window_shrink = KVM_VMX_DEFAULT_PLE_WINDOW_SHRINK;
@@ -5717,6 +5719,36 @@ static void type##_ple_window(struct kvm_vcpu *vcpu) \
 make_ple_window_modifier(grow,   *, +) /* grow_ple_window */
 make_ple_window_modifier(shrink, /, -) /* shrink_ple_window */
 
+static void clamp_ple_window_max(void)
+{
+   int maximum;
+
+   if (ple_window_grow  1)
+   return;
+
+   if (ple_window_grow  ple_window)
+   maximum = INT_MAX / ple_window_grow;
+   else
+   maximum = INT_MAX - ple_window_grow;
+
+   ple_window_max = clamp(ple_window_max, ple_window, maximum);
+}
+
+static int set_ple_window_grow(const char *arg, const struct kernel_param *kp)
+{
+   int ret;
+
+   clamp_ple_window_max();
+   ret = param_set_int(arg, kp);
+
+   return ret;
+}
+
+static struct kernel_param_ops ple_window_grow_ops = {
+   .set = set_ple_window_grow,
+   .get = param_get_int,
+};
+
 /*
  * Indicate a busy-waiting vcpu in spinlock. We do not enable the PAUSE
  * exiting, so only get here on cpu with PAUSE-Loop-Exiting.
-- 
2.0.4

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html