On 05/07/2013 11:32 AM, Scott Wood wrote:
booke64 was not maintaing consistent lazy ee state when exiting the

One typo ;-)

s/maintaing/maintaining


guest, leading to warnings and worse.

booke32 was less affected due to the absence of lazy ee, but it was
still feeding bad information into trace_hardirqs_off/on -- we don't
want guest execution to be seen as an "IRQs off" interval.  book3s_pr
also has this problem.

book3s_pr and booke both used kvmppc_lazy_ee_enable() without
hard-disabling EE first, which could lead to races when irq_happened is
cleared, or if an interrupt happens after kvmppc_lazy_ee_enable(), and
possibly other issues.

Now, on book3s_pr and booke, always hard-disable interrupts before
kvmppc_prepare_to_enter(), but leave them soft-enabled.  On book3s,
this should results in the right lazy EE state when the asm code
hard-enables on an exit.  On booke, we call hard_irq_disable() rather
than hard-enable immediately.

Looks we always need to call hard_irq_disable() before kvmppc_prepare_to_enter(), so I think we can add hard_irq_disable() directly into kvmppc_prepare_to_enater() since this can avoid forgetting to call hard_irq_disable() when call kvmppc_prepare_to_enater() somewhere in the future.

Here I assume Ben's fix to hard_irq_disable() is before this :) So what about 
this?

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index d7339df..e4e2120 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -397,6 +397,13 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
 static inline void kvmppc_lazy_ee_enable(void)
 {
 #ifdef CONFIG_PPC64
+       /*
+        * To avoid races, the caller must have gone directly from having
+        * interrupts fully-enabled to hard-disabled.
+        */
+       WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+       trace_hardirqs_on();
+
        /* Only need to enable IRQs by hard enabling them after this */
        local_paca->irq_happened = 0;
        local_paca->soft_enabled = 1;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d09baf1..1ea65cd 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -884,7 +884,6 @@ program_interrupt:
                 * and if we really did time things so badly, then we just exit
                 * again due to a host external interrupt.
                 */
-               local_irq_disable();
                s = kvmppc_prepare_to_enter(vcpu);
                if (s <= 0) {
                        local_irq_enable();
@@ -1121,7 +1120,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
         * really did time things so badly, then we just exit again due to
         * a host external interrupt.
         */
-       local_irq_disable();
        ret = kvmppc_prepare_to_enter(vcpu);
        if (ret <= 0) {
                local_irq_enable();
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index dc1f590..d412749 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -666,7 +666,6 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
                return -EINVAL;
        }

-       local_irq_disable();
        s = kvmppc_prepare_to_enter(vcpu);
        if (s <= 0) {
                local_irq_enable();
@@ -832,6 +831,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
 {
        int r = RESUME_HOST;
        int s;
+#ifdef CONFIG_PPC64
+       WARN_ON(local_paca->irq_happened != 0);
+#endif
+       hard_irq_disable();

        /* update before a new last_exit_type is rewritten */
        kvmppc_update_timing_stats(vcpu);
@@ -1143,7 +1146,6 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
         * aren't already exiting to userspace for some other reason.
         */
        if (!(r & RESUME_HOST)) {
-               local_irq_disable();
                s = kvmppc_prepare_to_enter(vcpu);
                if (s <= 0) {
                        local_irq_enable();
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 31084c6..147ac0e 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -64,7 +64,8 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu)
 {
        int r = 1;

-       WARN_ON_ONCE(!irqs_disabled());
+       hard_irq_disable();
+
        while (true) {
                if (need_resched()) {
                        local_irq_enable();
--
1.7.9.5

Tiejun


Signed-off-by: Scott Wood <[email protected]>
Cc: Mihai Caraman <[email protected]>
Cc: Benjamin Herrenschmidt <[email protected]>
Cc: Tiejun Chen <[email protected]>
---
Only tested on booke (32 and 64 bit).  Testers of book3s_pr would be
appreciated (particularly with lockdep enabled).
---
  arch/powerpc/include/asm/kvm_ppc.h |    7 +++++++
  arch/powerpc/kvm/book3s_pr.c       |    6 ++++--
  arch/powerpc/kvm/booke.c           |   12 ++++++++++--
  3 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index a5287fe..e55d7e5 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -399,6 +399,13 @@ static inline void kvmppc_mmu_flush_icache(pfn_t pfn)
  static inline void kvmppc_lazy_ee_enable(void)
  {
  #ifdef CONFIG_PPC64
+       /*
+        * To avoid races, the caller must have gone directly from having
+        * interrupts fully-enabled to hard-disabled.
+        */
+       WARN_ON(local_paca->irq_happened != PACA_IRQ_HARD_DIS);
+       trace_hardirqs_on();
+
        /* Only need to enable IRQs by hard enabling them after this */
        local_paca->irq_happened = 0;
        local_paca->soft_enabled = 1;
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index d09baf1..a1e70113 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -884,7 +884,8 @@ program_interrupt:
                 * and if we really did time things so badly, then we just exit
                 * again due to a host external interrupt.
                 */
-               local_irq_disable();
+               hard_irq_disable();
+               trace_hardirqs_off();
                s = kvmppc_prepare_to_enter(vcpu);
                if (s <= 0) {
                        local_irq_enable();
@@ -1121,7 +1122,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
         * really did time things so badly, then we just exit again due to
         * a host external interrupt.
         */
-       local_irq_disable();
+       hard_irq_disable();
+       trace_hardirqs_off();
        ret = kvmppc_prepare_to_enter(vcpu);
        if (ret <= 0) {
                local_irq_enable();
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index ecbe908..5dc1f53 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -666,7 +666,8 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
                return -EINVAL;
        }

-       local_irq_disable();
+       hard_irq_disable();
+       trace_hardirqs_off();
        s = kvmppc_prepare_to_enter(vcpu);
        if (s <= 0) {
                local_irq_enable();
@@ -834,6 +835,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
        int s;
        int idx;

+#ifdef CONFIG_PPC64
+       WARN_ON(local_paca->irq_happened != 0);
+#endif
+       hard_irq_disable();
+       trace_hardirqs_off();
+
        /* update before a new last_exit_type is rewritten */
        kvmppc_update_timing_stats(vcpu);

@@ -1150,7 +1157,8 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
         * aren't already exiting to userspace for some other reason.
         */
        if (!(r & RESUME_HOST)) {
-               local_irq_disable();
+               hard_irq_disable();
+               trace_hardirqs_off();
                s = kvmppc_prepare_to_enter(vcpu);
                if (s <= 0) {
                        local_irq_enable();


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to