On 07.05.2013, at 11:40, Bharat Bhushan wrote:

> This patch adds the debug stub support on booke/bookehv.
> Now QEMU debug stub can use hw breakpoint, watchpoint and
> software breakpoint to debug guest.
> 
> This is how we save/restore debug register context when switching
> between guest, userspace and kernel user-process:
> 
> When QEMU is running
> -> thread->debug_reg == QEMU debug register context.
> -> Kernel will handle switching the debug register on context switch.
> -> no vcpu_load() called
> 
> QEMU makes ioctls (except RUN)
> -> This will call vcpu_load()
> -> should not change context.
> -> Some ioctls can change vcpu debug register, context saved in 
> vcpu->debug_regs
> 
> QEMU Makes RUN ioctl
> -> Save thread->debug_reg on STACK
> -> Store thread->debug_reg == vcpu->debug_reg 
> -> load thread->debug_reg
> -> RUN VCPU ( So thread points to vcpu context )
> 
> Context switch happens When VCPU running 
> -> makes vcpu_load() should not load any context
> -> kernel loads the vcpu context as thread->debug_regs points to vcpu context.
> 
> On heavyweight_exit
> -> Load the context saved on stack in thread->debug_reg
> 
> Currently we do not support debug resource emulation to guest,
> On debug exception, always exit to user space irrespective of
> user space is expecting the debug exception or not. If this is
> unexpected exception (breakpoint/watchpoint event not set by
> userspace) then let us leave the action on user space. This
> is similar to what it was before, only thing is that now we
> have proper exit state available to user space.
> 
> Signed-off-by: Bharat Bhushan <[email protected]>
> ---
> arch/powerpc/include/asm/kvm_host.h |    3 +
> arch/powerpc/include/uapi/asm/kvm.h |    1 +
> arch/powerpc/kvm/booke.c            |  242 ++++++++++++++++++++++++++++++++---
> arch/powerpc/kvm/booke.h            |    5 +
> 4 files changed, 233 insertions(+), 18 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_host.h 
> b/arch/powerpc/include/asm/kvm_host.h
> index 838a577..1b29945 100644
> --- a/arch/powerpc/include/asm/kvm_host.h
> +++ b/arch/powerpc/include/asm/kvm_host.h
> @@ -524,7 +524,10 @@ struct kvm_vcpu_arch {
>       u32 eptcfg;
>       u32 epr;
>       u32 crit_save;
> +     /* guest debug registers*/
>       struct debug_reg dbg_reg;
> +     /* shadow debug registers */

Please be more verbose here. What exactly does this contain? Why do we need 
shadow and non-shadow registers? The comment as it is reads like

  /* Add one plus one */
  x = 1 + 1;

> +     struct debug_reg shadow_dbg_reg;
> #endif
>       gpa_t paddr_accessed;
>       gva_t vaddr_accessed;
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> b/arch/powerpc/include/uapi/asm/kvm.h
> index ded0607..f5077c2 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -27,6 +27,7 @@
> #define __KVM_HAVE_PPC_SMT
> #define __KVM_HAVE_IRQCHIP
> #define __KVM_HAVE_IRQ_LINE
> +#define __KVM_HAVE_GUEST_DEBUG
> 
> struct kvm_regs {
>       __u64 pc;
> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c
> index ef99536..6a44ad4 100644
> --- a/arch/powerpc/kvm/booke.c
> +++ b/arch/powerpc/kvm/booke.c
> @@ -133,6 +133,29 @@ static void kvmppc_vcpu_sync_fpu(struct kvm_vcpu *vcpu)
> #endif
> }
> 
> +static void kvmppc_vcpu_sync_debug(struct kvm_vcpu *vcpu)
> +{
> +     /* Synchronize guest's desire to get debug interrupts into shadow MSR */
> +#ifndef CONFIG_KVM_BOOKE_HV
> +     vcpu->arch.shadow_msr &= ~MSR_DE;
> +     vcpu->arch.shadow_msr |= vcpu->arch.shared->msr & MSR_DE;
> +#endif
> +
> +     /* Force enable debug interrupts when user space wants to debug */
> +     if (vcpu->guest_debug) {
> +#ifdef CONFIG_KVM_BOOKE_HV
> +             /*
> +              * Since there is no shadow MSR, sync MSR_DE into the guest
> +              * visible MSR.
> +              */
> +             vcpu->arch.shared->msr |= MSR_DE;
> +#else
> +             vcpu->arch.shadow_msr |= MSR_DE;
> +             vcpu->arch.shared->msr &= ~MSR_DE;
> +#endif
> +     }
> +}
> +
> /*
>  * Helper function for "full" MSR writes.  No need to call this if only
>  * EE/CE/ME/DE/RI are changing.
> @@ -150,6 +173,7 @@ void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
>       kvmppc_mmu_msr_notify(vcpu, old_msr);
>       kvmppc_vcpu_sync_spe(vcpu);
>       kvmppc_vcpu_sync_fpu(vcpu);
> +     kvmppc_vcpu_sync_debug(vcpu);
> }
> 
> static void kvmppc_booke_queue_irqprio(struct kvm_vcpu *vcpu,
> @@ -655,6 +679,7 @@ int kvmppc_core_check_requests(struct kvm_vcpu *vcpu)
> int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
> {
>       int ret, s;
> +     struct debug_reg debug;
> #ifdef CONFIG_PPC_FPU
>       unsigned int fpscr;
>       int fpexc_mode;
> @@ -699,11 +724,27 @@ int kvmppc_vcpu_run(struct kvm_run *kvm_run, struct 
> kvm_vcpu *vcpu)
>       kvmppc_load_guest_fp(vcpu);
> #endif
> 
> +     /*
> +      * Clear current->thread.dbcr0 so that kernel does not
> +      * restore h/w registers on context switch in vcpu running state.
> +      */

Incorrect comment?

> +     /* Save thread->debug context on stack */

/* Switch to guest debug context */

> +     memcpy(&debug, &current->thread.debug, sizeof(struct debug_reg));

debug = current->thread.debug;

> +     /* Load vcpu debug context in thread->debug */
> +     memcpy(&current->thread.debug, &vcpu->arch.shadow_dbg_reg,
> +            sizeof(struct debug_reg));

current->thread.debug = vcpu->arch.shadow_dbg_reg;

> +
> +     switch_booke_debug_regs(&current->thread);
> +
>       ret = __kvmppc_vcpu_run(kvm_run, vcpu);
> 
>       /* No need for kvm_guest_exit. It's done in handle_exit.
>          We also get here with interrupts enabled. */
> 
> +     /* Restore userspace context in thread->dbcr0 from stack*/

/* Switch back to user space debug context */

> +     memcpy(&current->thread.debug, &debug, sizeof(struct debug_reg));

current->thread.debug = debug;

> +     switch_booke_debug_regs(&current->thread);
> +
> #ifdef CONFIG_PPC_FPU
>       kvmppc_save_guest_fp(vcpu);
> 
> @@ -759,6 +800,36 @@ static int emulation_exit(struct kvm_run *run, struct 
> kvm_vcpu *vcpu)
>       }
> }
> 
> +/*
> + * Currently we do not support debug resource emulation to guest,
> + * so always exit to user space irrespective of user space is
> + * expecting the debug exception or not. This is unexpected event
> + * and let us leave the action on user space.

Please rework your wording.

> + */
> +static int kvmppc_handle_debug(struct kvm_run *run, struct kvm_vcpu *vcpu)
> +{
> +     struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +     u32 dbsr = vcpu->arch.dbsr;
> +
> +     run->debug.arch.status = 0;
> +     run->debug.arch.address = vcpu->arch.pc;
> +
> +     if (dbsr & (DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4)) {
> +             run->debug.arch.status |= KVMPPC_DEBUG_BREAKPOINT;
> +     } else {
> +             if (dbsr & (DBSR_DAC1W | DBSR_DAC2W))
> +                     run->debug.arch.status |= KVMPPC_DEBUG_WATCH_WRITE;
> +             else if (dbsr & (DBSR_DAC1R | DBSR_DAC2R))
> +                     run->debug.arch.status |= KVMPPC_DEBUG_WATCH_READ;
> +             if (dbsr & (DBSR_DAC1R | DBSR_DAC1W))
> +                     run->debug.arch.address = dbg_reg->dac1;
> +             else if (dbsr & (DBSR_DAC2R | DBSR_DAC2W))
> +                     run->debug.arch.address = dbg_reg->dac2;
> +     }
> +
> +     return RESUME_HOST;
> +}
> +
> static void kvmppc_fill_pt_regs(struct pt_regs *regs)
> {
>       ulong r1, ip, msr, lr;
> @@ -819,6 +890,11 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu 
> *vcpu,
>       case BOOKE_INTERRUPT_CRITICAL:
>               unknown_exception(&regs);
>               break;
> +     case BOOKE_INTERRUPT_DEBUG:
> +             /* Save DBSR before preemption is enabled */
> +             vcpu->arch.dbsr = mfspr(SPRN_DBSR);
> +             kvmppc_clear_dbsr();
> +             break;
>       }
> }
> 
> @@ -1118,18 +1194,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct 
> kvm_vcpu *vcpu,
>       }
> 
>       case BOOKE_INTERRUPT_DEBUG: {
> -             u32 dbsr;
> -
> -             vcpu->arch.pc = mfspr(SPRN_CSRR0);
> -
> -             /* clear IAC events in DBSR register */
> -             dbsr = mfspr(SPRN_DBSR);
> -             dbsr &= DBSR_IAC1 | DBSR_IAC2 | DBSR_IAC3 | DBSR_IAC4;
> -             mtspr(SPRN_DBSR, dbsr);
> -
> -             run->exit_reason = KVM_EXIT_DEBUG;
> +             r = kvmppc_handle_debug(run, vcpu);
> +             if (r == RESUME_HOST)
> +                     run->exit_reason = KVM_EXIT_DEBUG;
>               kvmppc_account_exit(vcpu, DEBUG_EXITS);
> -             r = RESUME_HOST;
>               break;
>       }
> 
> @@ -1180,7 +1248,7 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)
>       kvmppc_set_msr(vcpu, 0);
> 
> #ifndef CONFIG_KVM_BOOKE_HV
> -     vcpu->arch.shadow_msr = MSR_USER | MSR_DE | MSR_IS | MSR_DS;
> +     vcpu->arch.shadow_msr = MSR_USER | MSR_IS | MSR_DS;
>       vcpu->arch.shadow_pid = 1;
>       vcpu->arch.shared->msr = 0;
> #endif
> @@ -1557,12 +1625,6 @@ int kvm_vcpu_ioctl_set_one_reg(struct kvm_vcpu *vcpu, 
> struct kvm_one_reg *reg)
>       return r;
> }
> 
> -int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> -                                      struct kvm_guest_debug *dbg)
> -{
> -     return -EINVAL;
> -}
> -
> int kvm_arch_vcpu_ioctl_get_fpu(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
> {
>       return -ENOTSUPP;
> @@ -1668,6 +1730,145 @@ void kvmppc_decrementer_func(unsigned long data)
>       kvmppc_set_tsr_bits(vcpu, TSR_DIS);
> }
> 
> +static int kvmppc_booke_set_breakpoint(struct kvm_vcpu *vcpu, uint64_t addr,

s/set/add/

> +                                    int index)
> +{
> +     struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +     switch (index) {
> +     case 0:
> +             dbg_reg->dbcr0 |= DBCR0_IAC1 | DBCR0_IDM;
> +             dbg_reg->iac1 = addr;
> +             break;
> +     case 1:
> +             dbg_reg->dbcr0 |= DBCR0_IAC2 | DBCR0_IDM;
> +             dbg_reg->iac2 = addr;
> +             break;
> +#if CONFIG_PPC_ADV_DEBUG_IACS > 2
> +     case 2:
> +             dbg_reg->dbcr0 |= DBCR0_IAC3 | DBCR0_IDM;
> +             dbg_reg->iac3 = addr;
> +             break;
> +     case 3:
> +             dbg_reg->dbcr0 |= DBCR0_IAC4 | DBCR0_IDM;
> +             dbg_reg->iac4 = addr;
> +             break;
> +#endif
> +     default:
> +             return -EINVAL;
> +     }

    dbg_reg->dbcr0 |= DBCR0_IDM;

> +     return 0;
> +}
> +
> +static int kvmppc_booke_set_watchpoint(struct kvm_vcpu *vcpu, uint64_t addr,

s/set/add/

> +                                    int type, int index)
> +{
> +     struct debug_reg *dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +     switch (index) {
> +     case 0:
> +             if (type & KVMPPC_DEBUG_WATCH_READ)
> +                     dbg_reg->dbcr0 |= DBCR0_DAC1R | DBCR0_IDM;
> +             if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +                     dbg_reg->dbcr0 |= DBCR0_DAC1W | DBCR0_IDM;
> +             dbg_reg->dac1 = addr;
> +             break;
> +     case 1:
> +             if (type & KVMPPC_DEBUG_WATCH_READ)
> +                     dbg_reg->dbcr0 |= DBCR0_DAC2R | DBCR0_IDM;
> +             if (type & KVMPPC_DEBUG_WATCH_WRITE)
> +                     dbg_reg->dbcr0 |= DBCR0_DAC2W | DBCR0_IDM;
> +             dbg_reg->dac2 = addr;
> +             break;
> +     default:
> +             return -EINVAL;
> +     }

    dbg_reg->dbcr0 |= DBCR0_IDM;

> +     return 0;
> +}
> +
> +int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
> +                                      struct kvm_guest_debug *dbg)
> +{
> +     struct debug_reg *dbg_reg;
> +     int n, b = 0, w = 0;
> +
> +     if (!(dbg->control & KVM_GUESTDBG_ENABLE)) {
> +             /* Clear All debug events */
> +             vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +             vcpu->guest_debug = 0;
> +#ifdef CONFIG_KVM_BOOKE_HV
> +             /*
> +              * When user space is not using the debug resources
> +              * then allow guest to change the MSR.DE.
> +              */
> +             vcpu->arch.shadow_msrp &= ~MSRP_DEP;
> +#endif

guest_may_set_msr_de(vcpu, true);

> +             return 0;
> +     }
> +
> +#ifdef CONFIG_KVM_BOOKE_HV
> +     /*
> +      * When user space is using the debug resource then
> +      * do not allow guest to change the MSR.DE.
> +      */
> +     vcpu->arch.shadow_msrp |= MSRP_DEP;
> +#endif

guest_may_set_msr_de(vcpu, false);

> +     vcpu->guest_debug = dbg->control;
> +     vcpu->arch.shadow_dbg_reg.dbcr0 = 0;
> +     /* Set DBCR0_EDM in guest visible DBCR0 register. */
> +     vcpu->arch.dbg_reg.dbcr0 = DBCR0_EDM;
> +
> +     if (vcpu->guest_debug & KVM_GUESTDBG_SINGLESTEP)
> +             vcpu->arch.shadow_dbg_reg.dbcr0 |= DBCR0_IDM | DBCR0_IC;
> +
> +     if (!(vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP))
> +             return 0;
> +
> +     /* Code below handles only HW breakpoints */
> +     dbg_reg = &(vcpu->arch.shadow_dbg_reg);
> +
> +     /*
> +      * On BOOKE (e500v2); Set DBCR1 and DBCR2 to allow debug events
> +      * to occur when MSR.PR is set.
> +      * On BOOKE-HV (e500mc+); MSR.PR = 0 when guest is running. So we
> +      * should clear DBCR1 and DBCR2. And EPCR.DUVD is used to control
> +      * that debug events will not come in hypervisor (GS = 0).

This is still wrong. We want to trap in PR=1. It's what PR KVM does, it's what 
TCG does. There is no point in making HV KVM behave differently.

> +      */
> +#ifdef CONFIG_KVM_BOOKE_HV
> +     dbg_reg->dbcr1 = 0;
> +     dbg_reg->dbcr2 = 0;
> +#else
> +     dbg_reg->dbcr1 = DBCR1_IAC1US | DBCR1_IAC2US | DBCR1_IAC3US |
> +                       DBCR1_IAC4US;
> +     dbg_reg->dbcr2 = DBCR2_DAC1US | DBCR2_DAC2US;
> +#endif
> +
> +     for (n = 0; n < (KVMPPC_BOOKE_IAC_NUM + KVMPPC_BOOKE_DAC_NUM); n++) {
> +             uint64_t addr = dbg->arch.bp[n].addr;
> +             uint32_t type = dbg->arch.bp[n].type;
> +
> +             if (type == KVMPPC_DEBUG_NONE)
> +                     continue;
> +
> +             if (type & !(KVMPPC_DEBUG_WATCH_READ |
> +                          KVMPPC_DEBUG_WATCH_WRITE |
> +                          KVMPPC_DEBUG_BREAKPOINT))
> +                     return -EINVAL;
> +
> +             if (type & KVMPPC_DEBUG_BREAKPOINT) {
> +                     /* Setting H/W breakpoint */
> +                     if (kvmppc_booke_set_breakpoint(vcpu, addr, b++))

Please pass dbg_reg into the function

> +                             return -EINVAL;
> +             } else {
> +                     /* Setting H/W watchpoint */
> +                     if (kvmppc_booke_set_watchpoint(vcpu, addr, type, w++))

here too

> +                             return -EINVAL;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> void kvmppc_booke_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
> {
>       vcpu->cpu = smp_processor_id();
> @@ -1678,6 +1879,11 @@ void kvmppc_booke_vcpu_put(struct kvm_vcpu *vcpu)
> {
>       current->thread.kvm_vcpu = NULL;
>       vcpu->cpu = -1;
> +
> +     /* Disable all debug events */
> +     mtspr(SPRN_DBCR0, 0x0);

Why? Wouldn't normal preemption handling take care of this already?

> +     /* Clear pending debug event in DBSR */
> +     kvmppc_clear_dbsr();

Is there a chance we will ever have to do this? On debug exits from guest 
context we already clear dbsr.

My question from the last review round still stands. What if a breakpoint for 
guest address 0xc0001234 is active and on the host 0xc0001234 happens to be at 
kvm_run? Will we get a breakpoint inside of the host context? That would be 
bad. We only want to get debug events when

  e500v2: MSR.PR == 1
  e500mc: MSR.GS == 1


Alex

> }
> 
> int __init kvmppc_booke_init(void)
> diff --git a/arch/powerpc/kvm/booke.h b/arch/powerpc/kvm/booke.h
> index 5fd1ba6..a1ff67d 100644
> --- a/arch/powerpc/kvm/booke.h
> +++ b/arch/powerpc/kvm/booke.h
> @@ -129,4 +129,9 @@ static inline void kvmppc_save_guest_fp(struct kvm_vcpu 
> *vcpu)
>               giveup_fpu(current);
> #endif
> }
> +
> +static inline void kvmppc_clear_dbsr(void)
> +{
> +     mtspr(SPRN_DBSR, mfspr(SPRN_DBSR));
> +}
> #endif /* __KVM_BOOKE_H__ */
> -- 
> 1.5.6.5
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to