This patch adds the watchdog emulation in KVM. The watchdog
emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
The kernel timer are used for watchdog emulation and emulates
h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
it is configured.

Signed-off-by: Liu Yu <yu....@freescale.com>
Signed-off-by: Scott Wood <scottw...@freescale.com>
[bharat.bhus...@freescale.com: reworked patch]
Signed-off-by: Bharat Bhushan <bharat.bhus...@freescale.com>
---
v7:
 - kvmppc_core_dequeue_watchdog() and kvmppc_core_enque_watchdog()
   are made staic
 - Clear the KVM_REQ_WATCHDOG request when TSR_ENW | ENW_WIS cleared
   rather than checking these bits on handling the request.
   Below changes (pasted for quick understanding)
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index 7eedaeb..a0f5922 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -629,8 +629,7 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
                goto out;
        }
 
-       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
-           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
                kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
                ret = 0;
                goto out;
@@ -1098,8 +1097,7 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
                }
        }
 
-       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
-           (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
                run->exit_reason = KVM_EXIT_WATCHDOG;
                r = RESUME_HOST | (r & RESUME_FLAG_NV);
        }
@@ -1251,8 +1249,10 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,

                vcpu->arch.tsr = sregs->u.e.tsr;
 
-               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
+               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
+                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
                        arm_next_watchdog(vcpu);
+               }
        
                update_timer_ints(vcpu);
        }
@@ -1434,8 +1434,10 @@ void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 
tsr_bits)
         * We may have stopped the watchdog due to
         * being stuck on final expiration.
         */
-       if (tsr_bits & (TSR_ENW | TSR_WIS))
+       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
+               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
                arm_next_watchdog(vcpu);
+       }
----------

v6:
 - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
 - Using spin_lock_irqsave()
 - Using del_timer_sync().

v5:
 - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
 - Moved watchdog_next_timeout() in lock.

 arch/powerpc/include/asm/kvm_host.h  |    3 +
 arch/powerpc/include/asm/kvm_ppc.h   |    2 +
 arch/powerpc/include/asm/reg_booke.h |    7 ++
 arch/powerpc/kvm/book3s.c            |    9 ++
 arch/powerpc/kvm/booke.c             |  158 ++++++++++++++++++++++++++++++++++
 arch/powerpc/kvm/booke_emulate.c     |    8 ++
 arch/powerpc/kvm/powerpc.c           |   14 +++-
 include/linux/kvm.h                  |    2 +
 include/linux/kvm_host.h             |    1 +
 9 files changed, 202 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_host.h 
b/arch/powerpc/include/asm/kvm_host.h
index 572ad01..873ec11 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -469,6 +469,8 @@ struct kvm_vcpu_arch {
        ulong fault_esr;
        ulong queued_dear;
        ulong queued_esr;
+       spinlock_t wdt_lock;
+       struct timer_list wdt_timer;
        u32 tlbcfg[4];
        u32 mmucfg;
        u32 epr;
@@ -484,6 +486,7 @@ struct kvm_vcpu_arch {
        u8 osi_needed;
        u8 osi_enabled;
        u8 papr_enabled;
+       u8 watchdog_enabled;
        u8 sane;
        u8 cpu_type;
        u8 hcall_needed;
diff --git a/arch/powerpc/include/asm/kvm_ppc.h 
b/arch/powerpc/include/asm/kvm_ppc.h
index 0124937..1438a5e 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
 extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
 extern void kvmppc_decrementer_func(unsigned long data);
 extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
+extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
+extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
 /* Core-specific hooks */
 
diff --git a/arch/powerpc/include/asm/reg_booke.h 
b/arch/powerpc/include/asm/reg_booke.h
index 2d916c4..e07e6af 100644
--- a/arch/powerpc/include/asm/reg_booke.h
+++ b/arch/powerpc/include/asm/reg_booke.h
@@ -539,6 +539,13 @@
 #define TCR_FIE                0x00800000      /* FIT Interrupt Enable */
 #define TCR_ARE                0x00400000      /* Auto Reload Enable */
 
+#ifdef CONFIG_E500
+#define TCR_GET_WP(tcr)  ((((tcr) & 0xC0000000) >> 30) | \
+                             (((tcr) & 0x1E0000) >> 15))
+#else
+#define TCR_GET_WP(tcr)  (((tcr) & 0xC0000000) >> 30)
+#endif
+
 /* Bit definitions for the TSR. */
 #define TSR_ENW                0x80000000      /* Enable Next Watchdog */
 #define TSR_WIS                0x40000000      /* WDT Interrupt Status */
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index 3f2a836..e946665 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        return 0;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
index d25a097..a0f5922 100644
--- a/arch/powerpc/kvm/booke.c
+++ b/arch/powerpc/kvm/booke.c
@@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
        clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
 }
 
+static void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
+{
+       kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
+}
+
+static void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
+{
+       clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
+}
+
 static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
 {
 #ifdef CONFIG_KVM_BOOKE_HV
@@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
                msr_mask = MSR_CE | MSR_ME | MSR_DE;
                int_class = INT_CLASS_NONCRIT;
                break;
+       case BOOKE_IRQPRIO_WATCHDOG:
        case BOOKE_IRQPRIO_CRITICAL:
        case BOOKE_IRQPRIO_DBELL_CRIT:
                allowed = vcpu->arch.shared->msr & MSR_CE;
@@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu 
*vcpu,
        return allowed;
 }
 
+/*
+ * Return the number of jiffies until the next timeout.  If the timeout is
+ * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
+ * because the larger value can break the timer APIs.
+ */
+static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
+{
+       u64 tb, wdt_tb, wdt_ticks = 0;
+       u64 nr_jiffies = 0;
+       u32 period = TCR_GET_WP(vcpu->arch.tcr);
+
+       wdt_tb = 1ULL << (63 - period);
+       tb = get_tb();
+       /*
+        * The watchdog timeout will hapeen when TB bit corresponding
+        * to watchdog will toggle from 0 to 1.
+        */
+       if (tb & wdt_tb)
+               wdt_ticks = wdt_tb;
+
+       wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
+
+       /* Convert timebase ticks to jiffies */
+       nr_jiffies = wdt_ticks;
+
+       if (do_div(nr_jiffies, tb_ticks_per_jiffy))
+               nr_jiffies++;
+
+       return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
+}
+
+static void arm_next_watchdog(struct kvm_vcpu *vcpu)
+{
+       unsigned long nr_jiffies;
+       unsigned long flags;
+
+       spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
+       nr_jiffies = watchdog_next_timeout(vcpu);
+       /*
+        * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
+        * then do not run the watchdog timer as this can break timer APIs.
+        */
+       if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
+               mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
+       else
+               del_timer(&vcpu->arch.wdt_timer);
+       spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
+}
+
+void kvmppc_watchdog_func(unsigned long data)
+{
+       struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+       u32 tsr, new_tsr;
+       int final;
+
+       do {
+               new_tsr = tsr = vcpu->arch.tsr;
+               final = 0;
+
+               /* Time out event */
+               if (tsr & TSR_ENW) {
+                       if (tsr & TSR_WIS)
+                               final = 1;
+                       else
+                               new_tsr = tsr | TSR_WIS;
+               } else {
+                       new_tsr = tsr | TSR_ENW;
+               }
+       } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
+
+       if (new_tsr & TSR_WIS) {
+               smp_wmb();
+               kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
+               kvm_vcpu_kick(vcpu);
+       }
+
+       /*
+        * If this is final watchdog expiry and some action is required
+        * then exit to userspace.
+        */
+       if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
+           vcpu->arch.watchdog_enabled) {
+               smp_wmb();
+               kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
+               kvm_vcpu_kick(vcpu);
+       }
+
+       /*
+        * Stop running the watchdog timer after final expiration to
+        * prevent the host from being flooded with timers if the
+        * guest sets a short period.
+        * Timers will resume when TSR/TCR is updated next time.
+        */
+       if (!final)
+               arm_next_watchdog(vcpu);
+}
+
 static void update_timer_ints(struct kvm_vcpu *vcpu)
 {
        if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
                kvmppc_core_queue_dec(vcpu);
        else
                kvmppc_core_dequeue_dec(vcpu);
+
+       if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
+               kvmppc_core_queue_watchdog(vcpu);
+       else
+               kvmppc_core_dequeue_watchdog(vcpu);
 }
 
 static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
@@ -516,6 +629,12 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
kvm_vcpu *vcpu)
                goto out;
        }
 
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+               kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
+               ret = 0;
+               goto out;
+       }
+
        kvm_guest_enter();
 
 #ifdef CONFIG_PPC_FPU
@@ -978,6 +1097,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
kvm_vcpu *vcpu,
                }
        }
 
+       if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu)) {
+               run->exit_reason = KVM_EXIT_WATCHDOG;
+               r = RESUME_HOST | (r & RESUME_FLAG_NV);
+       }
+
        return r;
 }
 
@@ -1011,6 +1135,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
        return r;
 }
 
+int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
+{
+       /* setup watchdog timer once */
+       spin_lock_init(&vcpu->arch.wdt_lock);
+       setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
+                   (unsigned long)vcpu);
+
+       return 0;
+}
+
+void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
+{
+       del_timer_sync(&vcpu->arch.wdt_timer);
+}
+
 int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
 {
        int i;
@@ -1106,7 +1245,15 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
        }
 
        if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
+               u32 old_tsr = vcpu->arch.tsr;
+
                vcpu->arch.tsr = sregs->u.e.tsr;
+
+               if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS)) {
+                       clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+                       arm_next_watchdog(vcpu);
+               }
+
                update_timer_ints(vcpu);
        }
 
@@ -1267,6 +1414,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
 void kvmppc_set_tcr(struct kvm_vcpu *vcpu, u32 new_tcr)
 {
        vcpu->arch.tcr = new_tcr;
+       arm_next_watchdog(vcpu);
        update_timer_ints(vcpu);
 }
 
@@ -1281,6 +1429,16 @@ void kvmppc_set_tsr_bits(struct kvm_vcpu *vcpu, u32 
tsr_bits)
 void kvmppc_clr_tsr_bits(struct kvm_vcpu *vcpu, u32 tsr_bits)
 {
        clear_bits(tsr_bits, &vcpu->arch.tsr);
+
+       /*
+        * We may have stopped the watchdog due to
+        * being stuck on final expiration.
+        */
+       if (tsr_bits & (TSR_ENW | TSR_WIS)) {
+               clear_bit(KVM_REQ_WATCHDOG, &vcpu->requests);
+               arm_next_watchdog(vcpu);
+       }
+
        update_timer_ints(vcpu);
 }
 
diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c
index 12834bb..5a66ade 100644
--- a/arch/powerpc/kvm/booke_emulate.c
+++ b/arch/powerpc/kvm/booke_emulate.c
@@ -145,6 +145,14 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int 
sprn, ulong spr_val)
                kvmppc_clr_tsr_bits(vcpu, spr_val);
                break;
        case SPRN_TCR:
+               /*
+                * WRC is a 2-bit field that is supposed to preserve its
+                * value once written to non-zero.
+                */
+               if (vcpu->arch.tcr & TCR_WRC_MASK) {
+                       spr_val &= ~TCR_WRC_MASK;
+                       spr_val |= vcpu->arch.tcr & TCR_WRC_MASK;
+               }
                kvmppc_set_tcr(vcpu, spr_val);
                break;
 
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index a69ca4a..da484a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -224,6 +224,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        switch (ext) {
 #ifdef CONFIG_BOOKE
        case KVM_CAP_PPC_BOOKE_SREGS:
+       case KVM_CAP_PPC_BOOKE_WATCHDOG:
 #else
        case KVM_CAP_PPC_SEGSTATE:
        case KVM_CAP_PPC_HIOR:
@@ -382,6 +383,8 @@ enum hrtimer_restart kvmppc_decrementer_wakeup(struct 
hrtimer *timer)
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 {
+       int ret;
+
        hrtimer_init(&vcpu->arch.dec_timer, CLOCK_REALTIME, HRTIMER_MODE_ABS);
        tasklet_init(&vcpu->arch.tasklet, kvmppc_decrementer_func, (ulong)vcpu);
        vcpu->arch.dec_timer.function = kvmppc_decrementer_wakeup;
@@ -390,13 +393,14 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
 #ifdef CONFIG_KVM_EXIT_TIMING
        mutex_init(&vcpu->arch.exit_timing_lock);
 #endif
-
-       return 0;
+       ret = kvmppc_subarch_vcpu_init(vcpu);
+       return ret;
 }
 
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
        kvmppc_mmu_destroy(vcpu);
+       kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
@@ -641,6 +645,12 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
                r = 0;
                vcpu->arch.papr_enabled = true;
                break;
+#ifdef CONFIG_BOOKE
+       case KVM_CAP_PPC_BOOKE_WATCHDOG:
+               r = 0;
+               vcpu->arch.watchdog_enabled = true;
+               break;
+#endif
 #if defined(CONFIG_KVM_E500V2) || defined(CONFIG_KVM_E500MC)
        case KVM_CAP_SW_TLB: {
                struct kvm_config_tlb cfg;
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c03e59e..f4f5be8 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -163,6 +163,7 @@ struct kvm_pit_config {
 #define KVM_EXIT_OSI              18
 #define KVM_EXIT_PAPR_HCALL      19
 #define KVM_EXIT_S390_UCONTROL   20
+#define KVM_EXIT_WATCHDOG         21
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 #define KVM_INTERNAL_ERROR_EMULATION 1
@@ -620,6 +621,7 @@ struct kvm_ppc_smmu_info {
 #define KVM_CAP_PPC_GET_SMMU_INFO 78
 #define KVM_CAP_S390_COW 79
 #define KVM_CAP_PPC_ALLOC_HTAB 80
+#define KVM_CAP_PPC_BOOKE_WATCHDOG 81
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index dbc65f9..b38adc2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -69,6 +69,7 @@
 #define KVM_REQ_IMMEDIATE_EXIT    15
 #define KVM_REQ_PMU               16
 #define KVM_REQ_PMI               17
+#define KVM_REQ_WATCHDOG          18
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID    0
 
-- 
1.7.0.4


--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to