On 25.07.2012, at 05:49, Bharat Bhushan wrote:
> This patch adds the watchdog emulation in KVM. The watchdog
> emulation is enabled by KVM_ENABLE_CAP(KVM_CAP_PPC_BOOKE_WATCHDOG) ioctl.
> The kernel timer are used for watchdog emulation and emulates
> h/w watchdog state machine. On watchdog timer expiry, it exit to QEMU
> if TCR.WRC is non ZERO. QEMU can reset/shutdown etc depending upon how
> it is configured.
>
> Signed-off-by: Liu Yu <[email protected]>
> Signed-off-by: Scott Wood <[email protected]>
> [[email protected]: reworked patch]
> Signed-off-by: Bharat Bhushan <[email protected]>
> ---
> v6:
> - Added kvmppc_subarch_vcpu_unit/uninit() for subarch specific initialization
> - Using spin_lock_irqsave()
> - Using del_timer_sync().
>
> v5:
> - Checking that TSR_ENW/TSR_WIS are still set on KVM_EXIT_WATCHDOG.
> - Moved watchdog_next_timeout() in lock.
>
> arch/powerpc/include/asm/kvm_host.h | 3 +
> arch/powerpc/include/asm/kvm_ppc.h | 4 +
> arch/powerpc/include/asm/reg_booke.h | 7 ++
> arch/powerpc/kvm/book3s.c | 9 ++
> arch/powerpc/kvm/booke.c | 156 ++++++++++++++++++++++++++++++++++
> arch/powerpc/kvm/booke_emulate.c | 8 ++
> arch/powerpc/kvm/powerpc.c | 14 +++-
> include/linux/kvm.h | 2 +
> include/linux/kvm_host.h | 1 +
> 9 files changed, 202 insertions(+), 2 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/kvm_host.h
> b/arch/powerpc/include/asm/kvm_host.h
> index 50ea12f..43cac48 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -467,6 +467,8 @@ struct kvm_vcpu_arch {
> ulong fault_esr;
> ulong queued_dear;
> ulong queued_esr;
> + spinlock_t wdt_lock;
> + struct timer_list wdt_timer;
> u32 tlbcfg[4];
> u32 mmucfg;
> u32 epr;
> @@ -482,6 +484,7 @@ struct kvm_vcpu_arch {
> u8 osi_needed;
> u8 osi_enabled;
> u8 papr_enabled;
> + u8 watchdog_enabled;
> u8 sane;
> u8 cpu_type;
> u8 hcall_needed;
> diff --git a/arch/powerpc/include/asm/kvm_ppc.h
> b/arch/powerpc/include/asm/kvm_ppc.h
> index 0124937..823d563 100644
> --- a/arch/powerpc/include/asm/kvm_ppc.h
> +++ b/arch/powerpc/include/asm/kvm_ppc.h
> @@ -68,6 +68,8 @@ extern void kvmppc_emulate_dec(struct kvm_vcpu *vcpu);
> extern u32 kvmppc_get_dec(struct kvm_vcpu *vcpu, u64 tb);
> extern void kvmppc_decrementer_func(unsigned long data);
> extern int kvmppc_sanity_check(struct kvm_vcpu *vcpu);
> +extern int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu);
> +extern void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu);
>
> /* Core-specific hooks */
>
> @@ -104,6 +106,8 @@ extern void kvmppc_core_queue_external(struct kvm_vcpu
> *vcpu,
> struct kvm_interrupt *irq);
> extern void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> struct kvm_interrupt *irq);
> +extern void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu);
> +extern void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu);
>
> extern int kvmppc_core_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
> unsigned int op, int *advance);
> diff --git a/arch/powerpc/include/asm/reg_booke.h
> b/arch/powerpc/include/asm/reg_booke.h
> index 2d916c4..e07e6af 100644
> --- a/arch/powerpc/include/asm/reg_booke.h
> +++ b/arch/powerpc/include/asm/reg_booke.h
> @@ -539,6 +539,13 @@
> #define TCR_FIE 0x00800000 /* FIT Interrupt Enable */
> #define TCR_ARE 0x00400000 /* Auto Reload Enable */
>
> +#ifdef CONFIG_E500
> +#define TCR_GET_WP(tcr) ((((tcr) & 0xC0000000) >> 30) | \
> + (((tcr) & 0x1E0000) >> 15))
> +#else
> +#define TCR_GET_WP(tcr) (((tcr) & 0xC0000000) >> 30)
> +#endif
> +
> /* Bit definitions for the TSR. */
> #define TSR_ENW 0x80000000 /* Enable Next Watchdog */
> #define TSR_WIS 0x40000000 /* WDT Interrupt Status */
> diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
> index 3f2a836..e946665 100644
> --- a/arch/powerpc/kvm/book3s.c
> +++ b/arch/powerpc/kvm/book3s.c
> @@ -411,6 +411,15 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> return 0;
> }
>
> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> + return 0;
> +}
> +
> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> +}
> +
> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> int i;
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index d25a097..eadd86c 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -206,6 +206,16 @@ void kvmppc_core_dequeue_external(struct kvm_vcpu *vcpu,
> clear_bit(BOOKE_IRQPRIO_EXTERNAL_LEVEL, &vcpu->arch.pending_exceptions);
> }
>
> +void kvmppc_core_queue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + kvmppc_booke_queue_irqprio(vcpu, BOOKE_IRQPRIO_WATCHDOG);
> +}
> +
> +void kvmppc_core_dequeue_watchdog(struct kvm_vcpu *vcpu)
> +{
> + clear_bit(BOOKE_IRQPRIO_WATCHDOG, &vcpu->arch.pending_exceptions);
> +}
> +
These should be static, since we only need them in booke specific code.
> static void set_guest_srr(struct kvm_vcpu *vcpu, unsigned long srr0, u32 srr1)
> {
> #ifdef CONFIG_KVM_BOOKE_HV
> @@ -325,6 +335,7 @@ static int kvmppc_booke_irqprio_deliver(struct kvm_vcpu
> *vcpu,
> msr_mask = MSR_CE | MSR_ME | MSR_DE;
> int_class = INT_CLASS_NONCRIT;
> break;
> + case BOOKE_IRQPRIO_WATCHDOG:
> case BOOKE_IRQPRIO_CRITICAL:
> case BOOKE_IRQPRIO_DBELL_CRIT:
> allowed = vcpu->arch.shared->msr & MSR_CE;
> @@ -404,12 +415,114 @@ static int kvmppc_booke_irqprio_deliver(struct
> kvm_vcpu *vcpu,
> return allowed;
> }
>
> +/*
> + * Return the number of jiffies until the next timeout. If the timeout is
> + * longer than the NEXT_TIMER_MAX_DELTA, then return NEXT_TIMER_MAX_DELTA
> + * because the larger value can break the timer APIs.
> + */
> +static unsigned long watchdog_next_timeout(struct kvm_vcpu *vcpu)
> +{
> + u64 tb, wdt_tb, wdt_ticks = 0;
> + u64 nr_jiffies = 0;
> + u32 period = TCR_GET_WP(vcpu->arch.tcr);
> +
> + wdt_tb = 1ULL << (63 - period);
> + tb = get_tb();
> + /*
> + * The watchdog timeout will hapeen when TB bit corresponding
> + * to watchdog will toggle from 0 to 1.
> + */
> + if (tb & wdt_tb)
> + wdt_ticks = wdt_tb;
> +
> + wdt_ticks += wdt_tb - (tb & (wdt_tb - 1));
> +
> + /* Convert timebase ticks to jiffies */
> + nr_jiffies = wdt_ticks;
> +
> + if (do_div(nr_jiffies, tb_ticks_per_jiffy))
> + nr_jiffies++;
> +
> + return min_t(unsigned long long, nr_jiffies, NEXT_TIMER_MAX_DELTA);
> +}
> +
> +static void arm_next_watchdog(struct kvm_vcpu *vcpu)
> +{
> + unsigned long nr_jiffies;
> + unsigned long flags;
> +
> + spin_lock_irqsave(&vcpu->arch.wdt_lock, flags);
> + nr_jiffies = watchdog_next_timeout(vcpu);
> + /*
> + * If the number of jiffies of watchdog timer >= NEXT_TIMER_MAX_DELTA
> + * then do not run the watchdog timer as this can break timer APIs.
> + */
> + if (nr_jiffies < NEXT_TIMER_MAX_DELTA)
> + mod_timer(&vcpu->arch.wdt_timer, jiffies + nr_jiffies);
> + else
> + del_timer(&vcpu->arch.wdt_timer);
> + spin_unlock_irqrestore(&vcpu->arch.wdt_lock, flags);
> +}
> +
> +void kvmppc_watchdog_func(unsigned long data)
> +{
> + struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
> + u32 tsr, new_tsr;
> + int final;
> +
> + do {
> + new_tsr = tsr = vcpu->arch.tsr;
> + final = 0;
> +
> + /* Time out event */
> + if (tsr & TSR_ENW) {
> + if (tsr & TSR_WIS)
> + final = 1;
> + else
> + new_tsr = tsr | TSR_WIS;
> + } else {
> + new_tsr = tsr | TSR_ENW;
> + }
> + } while (cmpxchg(&vcpu->arch.tsr, tsr, new_tsr) != tsr);
> +
> + if (new_tsr & TSR_WIS) {
> + smp_wmb();
> + kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
> +
> + /*
> + * If this is final watchdog expiry and some action is required
> + * then exit to userspace.
> + */
> + if (final && (vcpu->arch.tcr & TCR_WRC_MASK) &&
> + vcpu->arch.watchdog_enabled) {
> + smp_wmb();
> + kvm_make_request(KVM_REQ_WATCHDOG, vcpu);
> + kvm_vcpu_kick(vcpu);
> + }
> +
> + /*
> + * Stop running the watchdog timer after final expiration to
> + * prevent the host from being flooded with timers if the
> + * guest sets a short period.
> + * Timers will resume when TSR/TCR is updated next time.
> + */
> + if (!final)
> + arm_next_watchdog(vcpu);
> +}
> +
> static void update_timer_ints(struct kvm_vcpu *vcpu)
> {
> if ((vcpu->arch.tcr & TCR_DIE) && (vcpu->arch.tsr & TSR_DIS))
> kvmppc_core_queue_dec(vcpu);
> else
> kvmppc_core_dequeue_dec(vcpu);
> +
> + if ((vcpu->arch.tcr & TCR_WIE) && (vcpu->arch.tsr & TSR_WIS))
> + kvmppc_core_queue_watchdog(vcpu);
> + else
> + kvmppc_core_dequeue_watchdog(vcpu);
> }
>
> static void kvmppc_core_check_exceptions(struct kvm_vcpu *vcpu)
> @@ -516,6 +629,13 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct
> kvm_vcpu *vcpu)
> goto out;
> }
>
> + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> + (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> + kvm_run->exit_reason = KVM_EXIT_WATCHDOG;
> + ret = 0;
> + goto out;
> + }
I think we want to push this into a generic function that checks for requests.
Check out
https://github.com/agraf/linux-2.6/commit/e8fe975b59b24777c6314addc9cea2b959454738
for what I did there, namely kvmppc_check_requests. If your case we obviously
want a return value. I don't mind which patch goes in first, so if you add a
nice and generic version first, I can easily base my mmu notifier patches on
top of yours.
> +
> kvm_guest_enter();
>
> #ifdef CONFIG_PPC_FPU
> @@ -978,6 +1098,12 @@ int kvmppc_handle_exit(struct kvm_run *run, struct
> kvm_vcpu *vcpu,
> }
> }
>
> + if (kvm_check_request(KVM_REQ_WATCHDOG, vcpu) &&
> + (vcpu->arch.tsr & (TSR_ENW | TSR_WIS))) {
> + run->exit_reason = KVM_EXIT_WATCHDOG;
> + r = RESUME_HOST | (r & RESUME_FLAG_NV);
> + }
> +
> return r;
> }
>
> @@ -1011,6 +1137,21 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
> return r;
> }
>
> +int kvmppc_subarch_vcpu_init(struct kvm_vcpu *vcpu)
> +{
> + /* setup watchdog timer once */
> + spin_lock_init(&vcpu->arch.wdt_lock);
> + setup_timer(&vcpu->arch.wdt_timer, kvmppc_watchdog_func,
> + (unsigned long)vcpu);
> +
> + return 0;
> +}
> +
> +void kvmppc_subarch_vcpu_uninit(struct kvm_vcpu *vcpu)
> +{
> + del_timer_sync(&vcpu->arch.wdt_timer);
> +}
> +
> int kvm_arch_vcpu_ioctl_get_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
> {
> int i;
> @@ -1106,7 +1247,13 @@ static int set_sregs_base(struct kvm_vcpu *vcpu,
> }
>
> if (sregs->u.e.update_special & KVM_SREGS_E_UPDATE_TSR) {
> + u32 old_tsr = vcpu->arch.tsr;
> +
> vcpu->arch.tsr = sregs->u.e.tsr;
> +
> + if ((old_tsr ^ vcpu->arch.tsr) & (TSR_ENW | TSR_WIS))
> + arm_next_watchdog(vcpu);
Just curious. Is this potentially racy with the watchdog timer, since we're not
accessing tsr atomically.
This patch already looks a lot nicer than the previous versions!
Scott, for the final patch I'd also like to get your ack. You know your way
around the hardware way better than me :).
Alex
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html