Re: linux-next: manual merge of the kvm-arm tree with Linus' tree

2015-04-08 Thread Christoffer Dall
On Wed, Apr 08, 2015 at 09:15:13AM +0100, Marc Zyngier wrote:
> On Tue, 7 Apr 2015 17:20:15 +0100
> Paolo Bonzini  wrote:
> 
> Hi Paolo,
> 
> > On 18/03/2015 08:55, Christoffer Dall wrote:
> > > Hi Stephen,
> > > 
> > > On Wed, Mar 18, 2015 at 02:41:11PM +1100, Stephen Rothwell wrote:
> > >> Hi all,
> > >>
> > >> Today's linux-next merge of the kvm-arm tree got a conflict in
> > >> virt/kvm/arm/vgic.c between commit ae705930fca6 ("arm/arm64: KVM: Keep
> > >> elrsr/aisr in sync with software model") from Linus' tree and commit
> > >> 71760950bf3d ("arm/arm64: KVM: add a common vgic_queue_irq_to_lr fn")
> > >> from the kvm-arm tree.
> > >>
> > >> I fixed it up (I think - see below) and can carry the fix as necessary
> > >> (no action is required).
> > >>
> > >> -- 
> > >> Cheers,
> > >> Stephen Rothwells...@canb.auug.org.au
> > >>
> > >> diff --cc virt/kvm/arm/vgic.c
> > >> index c9f60f524588,ffd937ca5141..
> > >> --- a/virt/kvm/arm/vgic.c
> > >> +++ b/virt/kvm/arm/vgic.c
> > >> @@@ -982,9 -1092,7 +1098,8 @@@ bool vgic_queue_irq(struct kvm_vcpu *vc
> > >>  if (vlr.source == sgi_source_id) {
> > >>  kvm_debug("LR%d piggyback for IRQ%d\n", lr, 
> > >> vlr.irq);
> > >>  BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
> > >> -vlr.state |= LR_STATE_PENDING;
> > >> -vgic_set_lr(vcpu, lr, vlr);
> > >> +vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
> > >>  +   vgic_sync_lr_elrsr(vcpu, lr, vlr);
> > >>  return true;
> > >>  }
> > >>  }
> > >> @@@ -1001,12 -1109,8 +1116,9 @@@
> > >>   
> > >>  vlr.irq = irq;
> > >>  vlr.source = sgi_source_id;
> > >> -vlr.state = LR_STATE_PENDING;
> > >> -if (!vgic_irq_is_edge(vcpu, irq))
> > >> -vlr.state |= LR_EOI_INT;
> > >> - 
> > >> -vgic_set_lr(vcpu, lr, vlr);
> > >> +vlr.state = 0;
> > >> +vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
> > >>  +   vgic_sync_lr_elrsr(vcpu, lr, vlr);
> > >>   
> > >>  return true;
> > >>   }
> > > 
> > > Looks great, thanks!
> > > -Christoffer
> > 
> > Got the same conflict when pulling from the kvm-arm tree, I used
> > a different resolution though:
> > 
> > diff --cc virt/kvm/arm/vgic.c
> > index c9f60f524588,b70174e74868..8d550ff14700
> > --- a/virt/kvm/arm/vgic.c
> > +++ b/virt/kvm/arm/vgic.c
> > @@@ -955,6 -1095,25 +1101,26 @@@ static void vgic_retire_disabled_irqs(s
> > }
> >   }
> >   
> > + static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
> > +int lr_nr, struct vgic_lr vlr)
> > + {
> > +   if (vgic_irq_is_active(vcpu, irq)) {
> > +   vlr.state |= LR_STATE_ACTIVE;
> > +   kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
> > +   vgic_irq_clear_active(vcpu, irq);
> > +   vgic_update_state(vcpu->kvm);
> > +   } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
> > +   vlr.state |= LR_STATE_PENDING;
> > +   kvm_debug("Set pending: 0x%x\n", vlr.state);
> > +   }
> > + 
> > +   if (!vgic_irq_is_edge(vcpu, irq))
> > +   vlr.state |= LR_EOI_INT;
> > + 
> > +   vgic_set_lr(vcpu, lr_nr, vlr);
> > ++  vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
> > + }
> > + 
> >   /*
> >* Queue an interrupt to a CPU virtual interface. Return true on success,
> >* or false if it wasn't possible to queue it.
> > @@@ -982,9 -1141,7 +1148,7 @@@ bool vgic_queue_irq(struct kvm_vcpu *vc
> > if (vlr.source == sgi_source_id) {
> > kvm_debug("LR%d piggyback for IRQ%d\n", lr, 
> > vlr.irq);
> > BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
> > -   vlr.state |= LR_STATE_PENDING;
> > -   vgic_set_lr(vcpu, lr, vlr);
> > -   vgic_sync_lr_elrsr(vcpu, lr, vlr);
> > + 

Re: [PATCH] arm: KVM: force execution of HCPTR access on VM exit

2015-04-08 Thread Christoffer Dall
On Mon, Mar 16, 2015 at 10:59:43AM +, Marc Zyngier wrote:
> On VM entry, we disable access to the VFP registers in order to
> perform a lazy save/restore of these registers.
> 
> On VM exit, we restore access, test if we did enable them before,
> and save/restore the guest/host registers if necessary. In this
> sequence, the FPEXC register is always accessed, irrespective
> of the trapping configuration.
> 
> If the guest didn't touch the VFP registers, then the HCPTR access
> has now enabled such access, but we're missing a barrier to ensure
> architectural execution of the new HCPTR configuration. If the HCPTR
> access has been delayed/reordered, the subsequent access to FPEXC
> will cause a trap, which we aren't prepared to handle at all.
> 
> The fix is to introduce a barrier that only takes place if the
> guest hasn't accessed its view of the VFP registers, making
> the access to FPEXC safe.
> 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm/kvm/interrupts.S | 7 +--
>  1 file changed, 5 insertions(+), 2 deletions(-)
> 
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index 79caf79..3ac7aca 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -175,10 +175,13 @@ __kvm_vcpu_return:
>  #ifdef CONFIG_VFPv3
>   @ Save floating point registers we if let guest use them.
>   tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
> - bne after_vfp_restore
> + beq 1f
> +
> + isb @ Force execution of HCPTR if we've just reenabled VFP access
> + b   after_vfp_restore
>  
>   @ Switch VFP/NEON hardware state to the host's
> - add r7, vcpu, #VCPU_VFP_GUEST
> +1:   add r7, vcpu, #VCPU_VFP_GUEST
>   store_vfp_state r7
>   add r7, vcpu, #VCPU_VFP_HOST
>   ldr r7, [r7]
> -- 
> 2.1.4
> 

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: avoid using kvm_run for in-kernel emulation

2015-04-09 Thread Christoffer Dall
On Sat, Mar 28, 2015 at 01:48:20AM +, Andre Przywara wrote:
> Our in-kernel VGIC emulation still uses struct kvm_run briefly before
> writing back the emulation result into the guest register. Using a
> userspace mapped data structure within the kernel sounds dodgy, also
> we do some extra copying at the moment at the end of the VGIC
> emulation code.
> Replace the usage of struct kvm_run in favour of passing separate
> parameters into kvm_handle_mmio_return (and rename the function on
> the way) to optimise the VGIC emulation. The real userland MMIO code
> path does not change much.
> 
> Signed-off-by: Andre Przywara 
> ---
> Hi,
> 
> this is an optimization of the VGIC code totally removing struct
> kvm_run from the VGIC emulation. In my eyes it provides a nice
> cleanup and is a logical consequence of the kvm_io_bus patches (on
> which it goes on top). On the other hand it is optional and I didn't
> want to merge it with the already quite large last patch 11.
> Marc, I leave it up to you whether you take this as part of the
> kvm_io_bus series or not.
> 
> Cheers,
> Andre.
> 
>  arch/arm/include/asm/kvm_mmio.h   |3 +-
>  arch/arm/kvm/arm.c|6 ++--
>  arch/arm/kvm/mmio.c   |   55 
> ++---
>  arch/arm64/include/asm/kvm_mmio.h |3 +-
>  virt/kvm/arm/vgic.c   |8 ++
>  5 files changed, 37 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmio.h b/arch/arm/include/asm/kvm_mmio.h
> index d8e90c8..53461a6 100644
> --- a/arch/arm/include/asm/kvm_mmio.h
> +++ b/arch/arm/include/asm/kvm_mmio.h
> @@ -28,7 +28,8 @@ struct kvm_decode {
>   bool sign_extend;
>  };
>  
> -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run);
> +int kvm_writeback_mmio_data(struct kvm_vcpu *vcpu, unsigned int len,
> + void *val, gpa_t phys_addr);
>  int io_mem_abort(struct kvm_vcpu *vcpu, struct kvm_run *run,
>phys_addr_t fault_ipa);
>  
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index e98370c..b837aef 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -506,8 +506,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>   if (ret)
>   return ret;
>  
> - if (run->exit_reason == KVM_EXIT_MMIO) {
> - ret = kvm_handle_mmio_return(vcpu, vcpu->run);
> + if (run->exit_reason == KVM_EXIT_MMIO && !run->mmio.is_write) {
> + ret = kvm_writeback_mmio_data(vcpu, run->mmio.len,
> +   run->mmio.data,
> +   run->mmio.phys_addr);
>   if (ret)
>   return ret;
>   }
> diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c
> index 974b1c6..3c57f96 100644
> --- a/arch/arm/kvm/mmio.c
> +++ b/arch/arm/kvm/mmio.c
> @@ -86,38 +86,36 @@ static unsigned long mmio_read_buf(char *buf, unsigned 
> int len)
>  }
>  
>  /**
> - * kvm_handle_mmio_return -- Handle MMIO loads after user space emulation
> - * @vcpu: The VCPU pointer
> - * @run:  The VCPU run struct containing the mmio data
> + * kvm_writeback_mmio_data -- Handle MMIO loads after user space emulation
> + * @vcpu:The VCPU pointer
> + * @len: The length in Bytes of the MMIO access
> + * @data_ptr:Pointer to the data to be written back into the guest
> + * @phys_addr:   Physical address of the originating MMIO access
>   *
>   * This should only be called after returning from userspace for MMIO load
> - * emulation.
> + * emulation. phys_addr is only used for the tracepoint output.
>   */
> -int kvm_handle_mmio_return(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +int kvm_writeback_mmio_data(struct kvm_vcpu *vcpu, unsigned int len,
> + void *data_ptr, gpa_t phys_addr)
>  {
>   unsigned long data;
> - unsigned int len;
>   int mask;
>  
> - if (!run->mmio.is_write) {
> - len = run->mmio.len;
> - if (len > sizeof(unsigned long))
> - return -EINVAL;
> + if (len > sizeof(unsigned long))
> + return -EINVAL;
>  
> - data = mmio_read_buf(run->mmio.data, len);
> + data = mmio_read_buf(data_ptr, len);
>  
> - if (vcpu->arch.mmio_decode.sign_extend &&
> - len < sizeof(unsigned long)) {
> - mask = 1U << ((len * 8) - 1);
> - data = (data ^ mask) - mask;
> - }
> -
> - trace_kvm_mmio(KVM_TRACE_MMIO_READ, len, run->mmio.phys_addr,
> -data);
> - data = vcpu_data_host_to_guest(vcpu, data, len);
> - *vcpu_reg(vcpu, vcpu->arch.mmio_decode.rt) = data;
> + if (vcpu->arch.mmio_decode.sign_extend &&
> + len < sizeof(unsigned long)) {
> + mask = 1U << ((len * 8) - 1);
> + data = (data ^ mask) - mask;
>   }
>  
> + trace_kvm

Re: Advice on HYP interface for AsyncPF

2015-04-09 Thread Christoffer Dall
On Thu, Apr 09, 2015 at 02:06:47PM +0200, Andrew Jones wrote:
> On Thu, Apr 09, 2015 at 08:57:23AM +0100, Marc Zyngier wrote:
> > On Thu, 9 Apr 2015 02:46:54 +0100
> > Mario Smarduch  wrote:
> > 
> > Hi Mario,
> > 
> > > I'm working with AsyncPF, and currently using
> > > hyp call to communicate guest GFN for host to inject
> > > virtual abort - page not available/page available.
> > > 
> > > Currently only PSCI makes use of that interface,
> > > (handle_hvc()) can we overload interface with additional
> > > hyp calls in this case pass guest gfn? Set arg0
> > > to some range outside of PSCI use.
> > 
> > I can't see a reason why we wouldn't open handle_hvc() to other
> > paravirtualized services. But this has to be done with extreme caution:
> > 
> > - This becomes an ABI between host and guest
> 
> To expand on that, if the benefits don't out weight the maintenance
> required for that ABI, for life, then it turns into a life-time burden.
> Any guest-host speedups that can be conceived, which require hypercalls,
> should probably be bounced of the hardware people first. Waiting for
> improvements in the virt extensions may be a better choice than
> committing to a PV solution.
> 
> > - We need a discovery protocol
> 
> Hopefully all users of the PSCI hypcall have been using function #0,
> because handle_hvc unfortunately hasn't been checking it.

huh?  I don't understand this, sorry.

> In any case,
> I'm not sure we have much choice but to start enforcing it now. Once we
> do, with something like
> 
> switch(hypcall_nr) {
> case 0: /* handle psci call */
> default: return -KVM_ENOSYS;
> }
> 
> then, I think the guest's discovery protocol can simply be
> 
> if (do_hypercall() == -ENOSYS) {
>/* PV path not supported, fall back to whatever... */
> }
> 
> > - We need to make sure other hypervisors don't reuse the same function
> >   number for other purposes
> 
> I'm not sure what this means. Xen already has several hypercalls defined
> for ARM, the same that they have for x86, which don't match any of the
> KVM hypercalls. Now, KVM for other arches (which is maybe what you meant)
> does define a handful, which we should integrate with, as KVM mixes
> architectures within it's hypercall number allocation, see
> include/uapi/linux/kvm_para.h. Just using the common code should make it
> easy to avoid problems. We don't have a problem with the PSCI hypcall, as
> zero isn't allocated. Ideally we would define PSCI properly though,
> e.g. KVM_HC_ARM_PSCI, and still reserve zero in the common header. To do
> that maybe we'll need to keep #0 as an ARM-only alias for the new number
> for compatibility now?
> 
> > 
> > Maybe we should adopt Xen's idea of a hypervisor node in DT where we
> > would describe the various services? How will that work with ACPI?
> 
> I don't think we'll ever have a "virt guest" ACPI table that we can
> use for this stuff, so this won't work for ACPI. But I think the ENOSYS
> probing should be sufficient for this anyway.
> 
We've reserved a Xen table on ACPI, not sure why we can't do the same
for KVM or a generic ARM PV table for that matter... ?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] arm: KVM: force execution of HCPTR access on VM exit

2015-04-09 Thread Christoffer Dall
On Wed, Apr 08, 2015 at 06:16:50PM +0100, Marc Zyngier wrote:
> On 08/04/15 12:40, Christoffer Dall wrote:
> > On Mon, Mar 16, 2015 at 10:59:43AM +, Marc Zyngier wrote:
> >> On VM entry, we disable access to the VFP registers in order to
> >> perform a lazy save/restore of these registers.
> >>
> >> On VM exit, we restore access, test if we did enable them before,
> >> and save/restore the guest/host registers if necessary. In this
> >> sequence, the FPEXC register is always accessed, irrespective
> >> of the trapping configuration.
> >>
> >> If the guest didn't touch the VFP registers, then the HCPTR access
> >> has now enabled such access, but we're missing a barrier to ensure
> >> architectural execution of the new HCPTR configuration. If the HCPTR
> >> access has been delayed/reordered, the subsequent access to FPEXC
> >> will cause a trap, which we aren't prepared to handle at all.
> >>
> >> The fix is to introduce a barrier that only takes place if the
> >> guest hasn't accessed its view of the VFP registers, making
> >> the access to FPEXC safe.
> >>
> >> Signed-off-by: Marc Zyngier 
> >> ---
> >>  arch/arm/kvm/interrupts.S | 7 +--
> >>  1 file changed, 5 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> >> index 79caf79..3ac7aca 100644
> >> --- a/arch/arm/kvm/interrupts.S
> >> +++ b/arch/arm/kvm/interrupts.S
> >> @@ -175,10 +175,13 @@ __kvm_vcpu_return:
> >>  #ifdef CONFIG_VFPv3
> >>@ Save floating point registers we if let guest use them.
> >>tst r2, #(HCPTR_TCP(10) | HCPTR_TCP(11))
> >> -  bne after_vfp_restore
> >> +  beq 1f
> >> +
> >> +  isb @ Force execution of HCPTR if we've just reenabled VFP access
> >> +  b   after_vfp_restore
> >>  
> >>@ Switch VFP/NEON hardware state to the host's
> >> -  add r7, vcpu, #VCPU_VFP_GUEST
> >> +1:add r7, vcpu, #VCPU_VFP_GUEST
> >>store_vfp_state r7
> >>add r7, vcpu, #VCPU_VFP_HOST
> >>ldr r7, [r7]
> >> -- 
> >> 2.1.4
> >>
> > 
> > Reviewed-by: Christoffer Dall 
> 
> Thanks (I must admit I completely forgot the existence of this patch...).
> 
> I guess we can queue that for -rc1, with a Cc to stable...
> 
yes, I tested it on TC2 (both A15 and A7) and it works like a charm.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: Advice on HYP interface for AsyncPF

2015-04-09 Thread Christoffer Dall
On Thu, Apr 09, 2015 at 03:59:46PM +0200, Andrew Jones wrote:
> On Thu, Apr 09, 2015 at 03:35:06PM +0200, Christoffer Dall wrote:
> > On Thu, Apr 09, 2015 at 02:06:47PM +0200, Andrew Jones wrote:
> > > On Thu, Apr 09, 2015 at 08:57:23AM +0100, Marc Zyngier wrote:
> > > > On Thu, 9 Apr 2015 02:46:54 +0100
> > > > Mario Smarduch  wrote:
> > > > 
> > > > Hi Mario,
> > > > 
> > > > > I'm working with AsyncPF, and currently using
> > > > > hyp call to communicate guest GFN for host to inject
> > > > > virtual abort - page not available/page available.
> > > > > 
> > > > > Currently only PSCI makes use of that interface,
> > > > > (handle_hvc()) can we overload interface with additional
> > > > > hyp calls in this case pass guest gfn? Set arg0
> > > > > to some range outside of PSCI use.
> > > > 
> > > > I can't see a reason why we wouldn't open handle_hvc() to other
> > > > paravirtualized services. But this has to be done with extreme caution:
> > > > 
> > > > - This becomes an ABI between host and guest
> > > 
> > > To expand on that, if the benefits don't out weight the maintenance
> > > required for that ABI, for life, then it turns into a life-time burden.
> > > Any guest-host speedups that can be conceived, which require hypercalls,
> > > should probably be bounced of the hardware people first. Waiting for
> > > improvements in the virt extensions may be a better choice than
> > > committing to a PV solution.
> > > 
> > > > - We need a discovery protocol
> > > 
> > > Hopefully all users of the PSCI hypcall have been using function #0,
> > > because handle_hvc unfortunately hasn't been checking it.
> > 
> > huh?  I don't understand this, sorry.
> 
> The hvc immediate used for psci is 0, and that's fine because there's
> only a single hvc function currently defined. If we want to define
> additional functions then we could assign each additional function a
> new immediate. However, as we've never enforced the immediate to be
> zero, then there's no guarantee we won't mess up a guest that expects
> any immediate to work.

you're refering to an implicit definition of an "hvc function"
differentiated by the immediate field - this is what threw me off.

> 
> Of course we can continue allowing any immediate to work too, by
> passing the function number in some register. Xen uses x16? We could
> use reg0 too, and just integrate with the PSCI function space as well.
> 

I think you have to allow any immediate value, since I don't think PSCI
differentiates on that, and we want to implement that spec.  So if you
can't rely on re-using the PSCI function numbers within a different
immediate number space, you might as well use the register that PSCI
uses for differentiating between 'functions'.

> > 
> > > In any case,
> > > I'm not sure we have much choice but to start enforcing it now. Once we
> > > do, with something like
> > > 
> > > switch(hypcall_nr) {
> > > case 0: /* handle psci call */
> > > default: return -KVM_ENOSYS;
> > > }
> > > 
> > > then, I think the guest's discovery protocol can simply be
> > > 
> > > if (do_hypercall() == -ENOSYS) {
> > >/* PV path not supported, fall back to whatever... */
> > > }
> > > 
> > > > - We need to make sure other hypervisors don't reuse the same function
> > > >   number for other purposes
> > > 
> > > I'm not sure what this means. Xen already has several hypercalls defined
> > > for ARM, the same that they have for x86, which don't match any of the
> > > KVM hypercalls. Now, KVM for other arches (which is maybe what you meant)
> > > does define a handful, which we should integrate with, as KVM mixes
> > > architectures within it's hypercall number allocation, see
> > > include/uapi/linux/kvm_para.h. Just using the common code should make it
> > > easy to avoid problems. We don't have a problem with the PSCI hypcall, as
> > > zero isn't allocated. Ideally we would define PSCI properly though,
> > > e.g. KVM_HC_ARM_PSCI, and still reserve zero in the common header. To do
> > > that maybe we'll need to keep #0 as an ARM-only alias for the new number
> > > for compatibility now?
> > > 
> > > > 
> > > > Maybe we should adopt Xen

Re: [PATCH v2] KVM: arm/arm64: avoid using kvm_run for in-kernel emulation

2015-04-13 Thread Christoffer Dall
On Fri, Apr 10, 2015 at 03:59:47PM +0100, Andre Przywara wrote:
> Our in-kernel VGIC emulation still uses struct kvm_run briefly before
> writing back the emulation result into the guest register.
> Although this particular case looks safe from an exploitation
> perspective, we can save some unneeded copying at the end of the VGIC
> emulation code.
> Replace the usage of struct kvm_run in favour of passing separate
> parameters in io_mem_abort(). Since the write back is now handled
> for all kvm_io_bus users, we can get rid of it in the VGIC.
> 
> Signed-off-by: Andre Przywara 

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: check IRQ number on userland injection

2015-04-13 Thread Christoffer Dall
On Fri, Apr 10, 2015 at 04:17:59PM +0100, Andre Przywara wrote:
> When userland injects a SPI via the KVM_IRQ_LINE ioctl we currently
> only check it against a fixed limit, which historically is set
> to 127. With the new dynamic IRQ allocation the effective limit may
> actually be smaller (64).
> So when now a malicious or buggy userland injects a SPI in that
> range, we spill over on our VGIC bitmaps and bytemaps memory.
> I could trigger a host kernel NULL pointer dereference with current
> mainline by injecting some bogus IRQ number from a hacked kvmtool:
> -
> 
> DEBUG: kvm_vgic_inject_irq(kvm, cpu=0, irq=114, level=1)
> DEBUG: vgic_update_irq_pending(kvm, cpu=0, irq=114, level=1)
> DEBUG: IRQ #114 still in the game, writing to bytemap now...
> Unable to handle kernel NULL pointer dereference at virtual address 
> pgd = ffc07652e000
> [] *pgd=f658b003, *pud=f658b003, *pmd=
> Internal error: Oops: 9606 [#1] PREEMPT SMP
> Modules linked in:
> CPU: 1 PID: 1053 Comm: lkvm-msi-irqinj Not tainted 4.0.0-rc7+ #3027
> Hardware name: FVP Base (DT)
> task: ffc0774e9680 ti: ffc0765a8000 task.ti: ffc0765a8000
> PC is at kvm_vgic_inject_irq+0x234/0x310
> LR is at kvm_vgic_inject_irq+0x30c/0x310
> pc : [] lr : [] pstate: 8145
> .
> 
> So this patch fixes this by checking the SPI number against the
> actual limit. Also we remove the former legacy hard limit of
> 127 in the ioctl code.
> 
> Signed-off-by: Andre Przywara 
> CC:  # 4.0, 3.19, 3.18

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: check IRQ number on userland injection

2015-04-13 Thread Christoffer Dall
On Fri, Apr 10, 2015 at 05:52:05PM +0100, Andre Przywara wrote:
> Hi Christopher,
> 
> On 10/04/15 16:29, Christopher Covington wrote:
> > Hi Andre,
> > 
> > On 04/10/2015 11:17 AM, Andre Przywara wrote:
> >> When userland injects a SPI via the KVM_IRQ_LINE ioctl we currently
> >> only check it against a fixed limit, which historically is set
> >> to 127. With the new dynamic IRQ allocation the effective limit may
> >> actually be smaller (64).
> >> So when now a malicious or buggy userland injects a SPI in that
> >> range, we spill over on our VGIC bitmaps and bytemaps memory.
> >> I could trigger a host kernel NULL pointer dereference with current
> >> mainline by injecting some bogus IRQ number from a hacked kvmtool:
> > 
> >> --- a/arch/arm/include/uapi/asm/kvm.h
> >> +++ b/arch/arm/include/uapi/asm/kvm.h
> >> @@ -195,7 +195,11 @@ struct kvm_arch_memory_slot {
> >>  #define KVM_ARM_IRQ_CPU_IRQ   0
> >>  #define KVM_ARM_IRQ_CPU_FIQ   1
> >>  
> >> -/* Highest supported SPI, from VGIC_NR_IRQS */
> >> +/*
> >> + * This used to hold the highest supported SPI, but it is now obsolete
> >> + * and only here to provide source code level compatibility with older
> >> + * userland. The highest SPI number can be set via 
> >> KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
> >> + */
> >>  #define KVM_ARM_IRQ_GIC_MAX   127
> > 
> > If that's the case should it maybe only defined when __KERNEL__ is not 
> > defined?
> 
> Mmmh, I am not sure it's really worth the hassle. Actually it seems like
> that neither kvmtool nor QEMU use this definition, so it's more or less
> orphaned by now. I am confident we can avoid it sneaking in in the
> kernel again.
> 
TBH, I wouldn't object against Marc enclosing the definition in an
#ifdef __KERNEL__.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm/arm64: check IRQ number on userland injection

2015-04-13 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 11:21:20AM +0100, Marc Zyngier wrote:
> On 13/04/15 11:04, Christoffer Dall wrote:
> > On Fri, Apr 10, 2015 at 05:52:05PM +0100, Andre Przywara wrote:
> >> Hi Christopher,
> >>
> >> On 10/04/15 16:29, Christopher Covington wrote:
> >>> Hi Andre,
> >>>
> >>> On 04/10/2015 11:17 AM, Andre Przywara wrote:
> >>>> When userland injects a SPI via the KVM_IRQ_LINE ioctl we currently
> >>>> only check it against a fixed limit, which historically is set
> >>>> to 127. With the new dynamic IRQ allocation the effective limit may
> >>>> actually be smaller (64).
> >>>> So when now a malicious or buggy userland injects a SPI in that
> >>>> range, we spill over on our VGIC bitmaps and bytemaps memory.
> >>>> I could trigger a host kernel NULL pointer dereference with current
> >>>> mainline by injecting some bogus IRQ number from a hacked kvmtool:
> >>>
> >>>> --- a/arch/arm/include/uapi/asm/kvm.h
> >>>> +++ b/arch/arm/include/uapi/asm/kvm.h
> >>>> @@ -195,7 +195,11 @@ struct kvm_arch_memory_slot {
> >>>>  #define KVM_ARM_IRQ_CPU_IRQ 0
> >>>>  #define KVM_ARM_IRQ_CPU_FIQ 1
> >>>>  
> >>>> -/* Highest supported SPI, from VGIC_NR_IRQS */
> >>>> +/*
> >>>> + * This used to hold the highest supported SPI, but it is now obsolete
> >>>> + * and only here to provide source code level compatibility with older
> >>>> + * userland. The highest SPI number can be set via 
> >>>> KVM_DEV_ARM_VGIC_GRP_NR_IRQS.
> >>>> + */
> >>>>  #define KVM_ARM_IRQ_GIC_MAX 127
> >>>
> >>> If that's the case should it maybe only defined when __KERNEL__ is not 
> >>> defined?
> >>
> >> Mmmh, I am not sure it's really worth the hassle. Actually it seems like
> >> that neither kvmtool nor QEMU use this definition, so it's more or less
> >> orphaned by now. I am confident we can avoid it sneaking in in the
> >> kernel again.
> >>
> > TBH, I wouldn't object against Marc enclosing the definition in an
> > #ifdef __KERNEL__.
> 
> Yeah, I'll fix that up (assuming you mean #ifndef rather than #ifdef).
> 
Yes, Monday morning ;)

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 01/10] KVM: add commentary for kvm_debug_exit_arch struct

2015-04-13 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:07:59PM +0100, Alex Bennée wrote:
> Bring into line with the commentary for the other structures and their
> KVM_EXIT_* cases.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> 
> v2
>   - add comments for other exit types
> 
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 8055706..5eedf84 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -226,6 +226,7 @@ struct kvm_run {
>   __u32 count;
>   __u64 data_offset; /* relative to kvm_run start */
>   } io;
> + /* KVM_EXIT_DEBUG */
>   struct {
>   struct kvm_debug_exit_arch arch;
>   } debug;
> @@ -274,6 +275,7 @@ struct kvm_run {
>   __u32 data;
>   __u8  is_write;
>   } dcr;
> + /* KVM_EXIT_INTERNAL_ERROR */
>   struct {
>   __u32 suberror;
>   /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
> @@ -284,6 +286,7 @@ struct kvm_run {
>   struct {
>   __u64 gprs[32];
>   } osi;
> + /* KVM_EXIT_PAPR_HCALL */
>   struct {
>   __u64 nr;
>   __u64 ret;
> -- 
> 2.3.4
> 

I'm fine with this change as it is, but I think it should update the
documenation of the kvm_run structure along with it, and in there it
seems the debug struct is listed as 'unused'; this should be addressed
in this patch set somewhere if we start using this struct?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 02/10] KVM: define common __KVM_GUESTDBG_USE_SW/HW_BP values

2015-04-13 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:00PM +0100, Alex Bennée wrote:
> Currently x86, powerpc and soon arm64 use the same two architecture
> specific bits for guest debug support for software and hardware
> breakpoints. This makes the shared values explicit while leaving the
> gate open for another architecture to use some other value if they
> really really want to.
> 
> Signed-off-by: Alex Bennée 
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> b/arch/powerpc/include/uapi/asm/kvm.h
> index ab4d473..1731569 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -310,8 +310,8 @@ struct kvm_guest_debug_arch {
>   * and upper 16 bits are architecture specific. Architecture specific defines
>   * that ioctl is for setting hardware breakpoint or software breakpoint.
>   */
> -#define KVM_GUESTDBG_USE_SW_BP   0x0001
> -#define KVM_GUESTDBG_USE_HW_BP   0x0002
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
>  
>  /* definition of registers in kvm_run */
>  struct kvm_sync_regs {
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index d7dcef5..1438202 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -250,8 +250,8 @@ struct kvm_debug_exit_arch {
>   __u64 dr7;
>  };
>  
> -#define KVM_GUESTDBG_USE_SW_BP   0x0001
> -#define KVM_GUESTDBG_USE_HW_BP   0x0002
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
>  #define KVM_GUESTDBG_INJECT_DB   0x0004
>  #define KVM_GUESTDBG_INJECT_BP   0x0008
>  
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 5eedf84..ce2db14 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -525,8 +525,16 @@ struct kvm_s390_irq {
>  
>  /* for KVM_SET_GUEST_DEBUG */
>  
> -#define KVM_GUESTDBG_ENABLE  0x0001
> -#define KVM_GUESTDBG_SINGLESTEP  0x0002
> +#define KVM_GUESTDBG_ENABLE  (1 << 0)
> +#define KVM_GUESTDBG_SINGLESTEP  (1 << 1)
> +
> +/*
> + * Architecture specific stuff uses the top 16 bits of the field,

can you be more specific than 'stuff' here?  features?

> + * however there is some shared commonality for the common cases

I don't like this sentence; shared commonality is a pleonasm and the use
of however makes it sounds like there's some caveat here.

If the top 16 bits are indeed arhictecture specific, then I think they
should just be defined in their architecture specific headers.  Unless
the idea here is that there's a fixed set of of flags that architectures
can choose to support, in which case it should simply be defined in the
common header.


> + */
> +#define __KVM_GUESTDBG_USE_SW_BP (1 << 16)
> +#define __KVM_GUESTDBG_USE_HW_BP (1 << 17)
> +
>  
>  struct kvm_guest_debug {
>   __u32 control;
> -- 
> 2.3.4
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 03/10] KVM: arm: guest debug, define API headers

2015-04-13 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:01PM +0100, Alex Bennée wrote:
> This commit defines the API headers for guest debugging. There are two
> architecture specific debug structures:
> 
>   - kvm_guest_debug_arch, allows us to pass in HW debug registers
>   - kvm_debug_exit_arch, signals the exact debug exit and pc
> 
> The type of debugging being used is control by the architecture specific
> control bits of the kvm_guest_debug->control flags in the ioctl
> structure.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>- expose hsr and pc directly to user-space
> 
> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index 3ef77a4..6ee70a0 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -100,10 +100,24 @@ struct kvm_sregs {
>  struct kvm_fpu {
>  };
>  
> +/*
> + * See ARM ARM D7.3: Debug Registers

see the ARM ARM for ??

> + *
> + * The control registers are architecturally defined as 32 bits but are
> + * stored as 64 bit values along side the value registers and aligned

do you mean alongside?

> + * with the rest 64 bit registers in the normal CPU context.

rest of the 64 bit

> + */

why do we store them as 64 bit values?  There's nothing prevented us
from defining them as __u32 is there?  Is this to make the ONE_REG
interface accessers more convenient?

> +#define KVM_ARM_NDBG_REGS 16

nit: is NDBG short for something I don't know about or is it
the number of debug registers we are noting here, in which case I think
KVM_ARM_NUM_DBG_REGS is more clear.

>  struct kvm_guest_debug_arch {
> + __u64 dbg_bcr[KVM_ARM_NDBG_REGS];
> + __u64 dbg_bvr[KVM_ARM_NDBG_REGS];
> + __u64 dbg_wcr[KVM_ARM_NDBG_REGS];
> + __u64 dbg_wvr[KVM_ARM_NDBG_REGS];
>  };
>  
>  struct kvm_debug_exit_arch {
> + __u64 pc;
> + __u32 hsr;
>  };
>  
>  struct kvm_sync_regs {
> @@ -207,4 +221,11 @@ struct kvm_arch_memory_slot {
>  
>  #endif
>  
> +/*
> + * Architecture related debug defines - upper 16 bits of
> + * kvm_guest_debug->control
> + */
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
> +
>  #endif /* __ARM_KVM_H__ */
> -- 
> 2.3.4
> 

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 04/10] KVM: arm: guest debug, add stub KVM_SET_GUEST_DEBUG ioctl

2015-04-13 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:02PM +0100, Alex Bennée wrote:
> This commit adds a stub function to support the KVM_SET_GUEST_DEBUG
> ioctl. Currently any operation flag will return EINVAL. Actual
> functionality will be added with further patches.
> 
> Signed-off-by: Alex Bennée .
> 
> ---
> v2
>   - simplified form of the ioctl (stuff will go into setup_debug)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index b112efc..06c5064 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2604,7 +2604,7 @@ handled.
>  4.87 KVM_SET_GUEST_DEBUG
>  
>  Capability: KVM_CAP_SET_GUEST_DEBUG
> -Architectures: x86, s390, ppc
> +Architectures: x86, s390, ppc, arm64
>  Type: vcpu ioctl
>  Parameters: struct kvm_guest_debug (in)
>  Returns: 0 on success; -1 on error
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 5560f74..445933d 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -183,6 +183,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> ext)
>   case KVM_CAP_ARM_PSCI:
>   case KVM_CAP_ARM_PSCI_0_2:
>   case KVM_CAP_READONLY_MEM:
> + case KVM_CAP_SET_GUEST_DEBUG:
>   r = 1;
>   break;

shouldn't you wait with advertising this capability until you've
implemented support for it?


Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH] arm64: KVM: remove fpsimd save/restore from the world switch

2015-04-13 Thread Christoffer Dall
On Thu, Apr 09, 2015 at 05:53:59PM +0100, Marc Zyngier wrote:
> The world switch spends quite some time dealing with the FP/SIMD
> registers, as the state is quite sizeable (32 128bit registers,
> plus some crumbs on the side). We save/restore them on each
> entry/exit, so that both the host and the guest always see
> the state they expect.
> 
> But let's face it: the host kernel doesn't care. It is the host
> userspace that actually cares about FP. An obvious improvement is
> to remove the save/restore from the world switch, and only perform
> it when we're about to enter/exit the guest (by plugging it into
> vcpu_load/vcpu_put). The effect is pretty spectacular when running
> hackbench (which is the only benchmark worth looking at):
> 
> Without this patch:
> 
>   Running with 50*40 (== 2000) tasks.
>   Time: 36.756
>   Running with 50*40 (== 2000) tasks.
>   Time: 36.679
>   Running with 50*40 (== 2000) tasks.
>   Time: 36.699
> 
> With this patch:
> 
>   Running with 50*40 (== 2000) tasks.
>   Time: 30.947
>   Running with 50*40 (== 2000) tasks.
>   Time: 30.868
>   Running with 50*40 (== 2000) tasks.
>   Time: 30.961
> 
> This is on a HiKey board (8*A53), with a 4 vcpu guest.

cool.  Based on stats from kvm-unit-tests on A57 we should reduce the
overall world-switch cost (in the best cases, caches, etc.) with ~8.5%,
but this is even better and we are doing slightly more work than
context-switching here, so I'm guessing factoring in potential extra
cache misses, it can be this good.

However, on XGene with Ubuntu 14.04 Trusty, I get the following (do not
compare to Marc's results, I may be using different kernel settings and
different payload size):

Without the patch:

Running with 50*40 (== 2000) tasks.
Time: 15.970
Running with 50*40 (== 2000) tasks.
Time: 15.963
Running with 50*40 (== 2000) tasks.
Time: 15.875


With the patch:

Running with 50*40 (== 2000) tasks.
Time: 16.768:
Running with 50*40 (== 2000) tasks.
Time: 14.865
Running with 50*40 (== 2000) tasks.
Time: 14.880

On an HP Moonshot server I ran a number of other benchmarks and got
similarly boring results.

Comments on the patch itself below:

> 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm/include/asm/kvm_host.h   |  3 +++
>  arch/arm/kvm/arm.c|  2 ++
>  arch/arm64/include/asm/kvm_asm.h  |  4 
>  arch/arm64/include/asm/kvm_host.h |  3 +++
>  arch/arm64/kvm/Makefile   |  1 +
>  arch/arm64/kvm/fpsimd.S   | 39 
>  arch/arm64/kvm/handle_fpsimd.c| 42 
> +++
>  arch/arm64/kvm/hyp.S  | 27 -
>  8 files changed, 94 insertions(+), 27 deletions(-)
>  create mode 100644 arch/arm64/kvm/fpsimd.S
>  create mode 100644 arch/arm64/kvm/handle_fpsimd.c
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index d71607c..65cf1d1 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -226,6 +226,9 @@ static inline void vgic_arch_setup(const struct 
> vgic_params *vgic)
>  int kvm_perf_init(void);
>  int kvm_perf_teardown(void);
>  
> +static inline void kvm_fpsimd_flush_hwstate(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_fpsimd_sync_hwstate(struct kvm_vcpu *vcpu) {}
> +
>  void kvm_mmu_wp_memory_region(struct kvm *kvm, int slot);
>  
>  struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 6f53645..ff1213c 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -287,6 +287,7 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
>   vcpu->cpu = cpu;
>   vcpu->arch.host_cpu_context = this_cpu_ptr(kvm_host_cpu_state);
>  
> + kvm_fpsimd_flush_hwstate(vcpu);

not sure about the flus/sync terminology here, because we're not really
flushing a software model to hardware state - we're doing both in every
step.

How about:

kvm_fpsimd_load_vcpu_state()
kvm_fpsimd_put_vcpu_state()

>   kvm_arm_set_running_vcpu(vcpu);
>  }
>  
> @@ -299,6 +300,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>*/
>   vcpu->cpu = -1;
>  
> + kvm_fpsimd_sync_hwstate(vcpu);
>   kvm_arm_set_running_vcpu(NULL);
>  }
>  
> diff --git a/arch/arm64/include/asm/kvm_asm.h 
> b/arch/arm64/include/asm/kvm_asm.h
> index 4f7310f..eafb0c3 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -137,6 +137,10 @@ extern char __restore_vgic_v2_state[];
>  extern char __save_vgic_v3_state[];
>  extern char __restore_vgic_v3_state[];
>  
> +struct kvm_cpu_context;
> +extern void __kvm_save_fpsimd(struct kvm_cpu_context *);
> +extern void __kvm_restore_fpsimd(struct kvm_cpu_context *);
> +
>  #endif
>  
>  #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/include

Re: [RFC PATCH] arm64: KVM: remove fpsimd save/restore from the world switch

2015-04-13 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 03:12:10PM +0100, Marc Zyngier wrote:
> On 13/04/15 13:57, Christoffer Dall wrote:
> > On Thu, Apr 09, 2015 at 05:53:59PM +0100, Marc Zyngier wrote:
> >> The world switch spends quite some time dealing with the FP/SIMD
> >> registers, as the state is quite sizeable (32 128bit registers,
> >> plus some crumbs on the side). We save/restore them on each
> >> entry/exit, so that both the host and the guest always see
> >> the state they expect.
> >>
> >> But let's face it: the host kernel doesn't care. It is the host
> >> userspace that actually cares about FP. An obvious improvement is
> >> to remove the save/restore from the world switch, and only perform
> >> it when we're about to enter/exit the guest (by plugging it into
> >> vcpu_load/vcpu_put). The effect is pretty spectacular when running
> >> hackbench (which is the only benchmark worth looking at):
> >>
> >> Without this patch:
> >>
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 36.756
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 36.679
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 36.699
> >>
> >> With this patch:
> >>
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 30.947
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 30.868
> >>Running with 50*40 (== 2000) tasks.
> >>Time: 30.961
> >>
> >> This is on a HiKey board (8*A53), with a 4 vcpu guest.
> > 
> > cool.  Based on stats from kvm-unit-tests on A57 we should reduce the
> > overall world-switch cost (in the best cases, caches, etc.) with ~8.5%,
> > but this is even better and we are doing slightly more work than
> > context-switching here, so I'm guessing factoring in potential extra
> > cache misses, it can be this good.
> > 
> > However, on XGene with Ubuntu 14.04 Trusty, I get the following (do not
> > compare to Marc's results, I may be using different kernel settings and
> > different payload size):
> > 
> > Without the patch:
> > 
> > Running with 50*40 (== 2000) tasks.
> > Time: 15.970
> > Running with 50*40 (== 2000) tasks.
> > Time: 15.963
> > Running with 50*40 (== 2000) tasks.
> > Time: 15.875
> > 
> > 
> > With the patch:
> > 
> > Running with 50*40 (== 2000) tasks.
> > Time: 16.768:
> > Running with 50*40 (== 2000) tasks.
> > Time: 14.865
> > Running with 50*40 (== 2000) tasks.
> > Time: 14.880
> > 
> > On an HP Moonshot server I ran a number of other benchmarks and got
> > similarly boring results.
> 
> I did another set of tests, this time involving Seattle, XGene and the
> HiKey board. The result is that you cannot trust HiKey, this is the most
> unpredictable platform I've ever seen (I had some chosen words for it,
> that I don't want to write here).
> 
> So while this patch seems to provide a small improvement in some cases,
> it is definitely not as interesting as my first testing suggested. Too
> good to be true! ;-)
> 
> I'm going to try and revive my lazy-fp patches, and see if there's
> anything to improve here.
> 
> > Comments on the patch itself below:
> > 
> >>
> >> Signed-off-by: Marc Zyngier 
> >> ---
> >>  arch/arm/include/asm/kvm_host.h   |  3 +++
> >>  arch/arm/kvm/arm.c|  2 ++
> >>  arch/arm64/include/asm/kvm_asm.h  |  4 
> >>  arch/arm64/include/asm/kvm_host.h |  3 +++
> >>  arch/arm64/kvm/Makefile   |  1 +
> >>  arch/arm64/kvm/fpsimd.S   | 39 
> >> 
> >>  arch/arm64/kvm/handle_fpsimd.c| 42 
> >> +++
> >>  arch/arm64/kvm/hyp.S  | 27 -
> >>  8 files changed, 94 insertions(+), 27 deletions(-)
> >>  create mode 100644 arch/arm64/kvm/fpsimd.S
> >>  create mode 100644 arch/arm64/kvm/handle_fpsimd.c
> >>
> >> diff --git a/arch/arm/include/asm/kvm_host.h 
> >> b/arch/arm/include/asm/kvm_host.h
> >> index d71607c..65cf1d1 100644
> >> --- a/arch/arm/include/asm/kvm_host.h
> >> +++ b/arch/arm/include/asm/kvm_host.h
> >> @@ -226,6 +226,9 @@ static inline void vgic_arch_setup(const struct 
> >> vgic_params *vgic)
> >>  int kvm_perf_init(void);
> >>  int kvm_perf_teardown(void);
>

Re: [PATCH v2 05/10] KVM: arm: introduce kvm_arch_setup/clear_debug()

2015-04-13 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:03PM +0100, Alex Bennée wrote:
> This is a precursor for later patches which will need to do more to
> setup debug state before entering the hyp.S switch code. The existing
> functionality for setting mdcr_el2 has been moved out of hyp.S and now
> uses the value kept in vcpu->arch.mdcr_el2.
> 
> This also moves the conditional setting of the TDA bit from the hyp code
> into the C code.
> 
> Signed-off-by: Alex Bennée 
> 
>  create mode 100644 arch/arm64/kvm/debug.c
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index 41008cd..8c01c97 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -242,5 +242,7 @@ static inline void kvm_arch_hardware_unsetup(void) {}
>  static inline void kvm_arch_sync_events(struct kvm *kvm) {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
> +static inline void kvm_arch_setup_debug(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arch_clear_debug(struct kvm_vcpu *vcpu) {}
>  
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 445933d..7ea8b0e 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -523,6 +523,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>  
>   kvm_vgic_flush_hwstate(vcpu);
>   kvm_timer_flush_hwstate(vcpu);
> + kvm_arch_setup_debug(vcpu);
>  
>   local_irq_disable();
>  
> @@ -569,6 +570,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>* Back from guest
>*/
>  
> + kvm_arch_clear_debug(vcpu);
>   kvm_timer_sync_hwstate(vcpu);
>   kvm_vgic_sync_hwstate(vcpu);
>  
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 8ac3c70..0631840 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -101,6 +101,7 @@ struct kvm_vcpu_arch {
>  
>   /* HYP configuration */
>   u64 hcr_el2;
> + u32 mdcr_el2;
>  
>   /* Exception Information */
>   struct kvm_vcpu_fault_info fault;
> @@ -257,4 +258,7 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) 
> {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  
> +void kvm_arch_setup_debug(struct kvm_vcpu *vcpu);
> +void kvm_arch_clear_debug(struct kvm_vcpu *vcpu);
> +
>  #endif /* __ARM64_KVM_HOST_H__ */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index f7fa65d..cd06209 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -122,6 +122,7 @@ int main(void)
>DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, 
> arch.fault.hpfar_el2));
>DEFINE(VCPU_DEBUG_FLAGS,   offsetof(struct kvm_vcpu, arch.debug_flags));
>DEFINE(VCPU_HCR_EL2,   offsetof(struct kvm_vcpu, 
> arch.hcr_el2));
> +  DEFINE(VCPU_MDCR_EL2,  offsetof(struct kvm_vcpu, arch.mdcr_el2));
>DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
>DEFINE(VCPU_HOST_CONTEXT,  offsetof(struct kvm_vcpu, 
> arch.host_cpu_context));
>DEFINE(VCPU_TIMER_CNTV_CTL,offsetof(struct kvm_vcpu, 
> arch.timer_cpu.cntv_ctl));
> diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile
> index 4e6e09e..6796d4a 100644
> --- a/arch/arm64/kvm/Makefile
> +++ b/arch/arm64/kvm/Makefile
> @@ -17,7 +17,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o
>  
>  kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o
>  kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o
> -kvm-$(CONFIG_KVM_ARM_HOST) += guest.o reset.o sys_regs.o 
> sys_regs_generic_v8.o
> +kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o 
> sys_regs_generic_v8.o
>  
>  kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic.o
>  kvm-$(CONFIG_KVM_ARM_VGIC) += $(KVM)/arm/vgic-v2.o
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> new file mode 100644
> index 000..8a29d0b
> --- /dev/null
> +++ b/arch/arm64/kvm/debug.c
> @@ -0,0 +1,58 @@
> +/*
> + * Debug and Guest Debug support
> + *
> + * Copyright (C) 2015 - Linaro Ltd
> + * Author: Alex Bennée 
> + *
> + * This program is free software; you can redistribute it and/or modify
> + * it under the terms of the GNU General Public License version 2 as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of

Re: [PATCH v2 05/10] KVM: arm: introduce kvm_arch_setup/clear_debug()

2015-04-13 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 04:36:23PM +0200, Christoffer Dall wrote:

[...]

> > +
> > +/**
> > + * kvm_arch_setup_debug - set-up debug related stuff
> 
> nit: I think you want "set up" when it's a verb.
> 
> > + *
> > + * @vcpu:  the vcpu pointer
> > + *
> > + * This is called before each entry in to the hypervisor to setup any
> 
> s/in to/into/
> s/setup/set up/
> 
> > + * debug related registers. Currently this just ensures we will trap
> > + * access to:
> 
> guest accesses to:
> 
> > + *  - Performance monitors (MDCR_EL2_TPM/MDCR_EL2_TPMCR)
> > + *  - Debug ROM Address (MDCR_EL2_TDRA)
> > + *  - Power down debug registers (MDCR_EL2_TDOSA)
> > + *
> > + * Additionally the hypervisor lazily saves/restores the debug
> > + * register state. If it is not currently doing so (arch.debug_flags)
> > + * then we also need to ensure we trap if the guest messes with them
> > + * so we know we need to save them.
> 
> This paragraph is a little hard to make sense of.  If I understand it
> correctly, the point is that when debugging the guest we need to make
> sure guest accesses to the debug registers traps?  If so, I would
> suggest something like:
> 
>   Additionally, KVM only traps guest accesses to the debug registers if
>   the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
>   flag on vcpu->arch.debug_flags).  Since the guest must not interfere
>   with the hardware state when debugging the guest, we must ensure that
>   trapping is enabled whenever we are debugging the guest.
> 

thinking about this, I don't think we're enforcing this yet, but maybe
that will come in the later patches or I misread the original paragraph.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 02/10] KVM: define common __KVM_GUESTDBG_USE_SW/HW_BP values

2015-04-14 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 03:51:33PM +0100, Alex Bennée wrote:
> 
> Christoffer Dall  writes:
> 
> > On Tue, Mar 31, 2015 at 04:08:00PM +0100, Alex Bennée wrote:
> >> Currently x86, powerpc and soon arm64 use the same two architecture
> >> specific bits for guest debug support for software and hardware
> >> breakpoints. This makes the shared values explicit while leaving the
> >> gate open for another architecture to use some other value if they
> >> really really want to.
> >> 
> >> Signed-off-by: Alex Bennée 
> >> 
> >> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> >> b/arch/powerpc/include/uapi/asm/kvm.h
> >> index ab4d473..1731569 100644
> >> --- a/arch/powerpc/include/uapi/asm/kvm.h
> >> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> >> @@ -310,8 +310,8 @@ struct kvm_guest_debug_arch {
> >>   * and upper 16 bits are architecture specific. Architecture specific 
> >> defines
> >>   * that ioctl is for setting hardware breakpoint or software breakpoint.
> >>   */
> >> -#define KVM_GUESTDBG_USE_SW_BP0x0001
> >> -#define KVM_GUESTDBG_USE_HW_BP0x0002
> >> +#define KVM_GUESTDBG_USE_SW_BP__KVM_GUESTDBG_USE_SW_BP
> >> +#define KVM_GUESTDBG_USE_HW_BP__KVM_GUESTDBG_USE_HW_BP
> >>  
> >>  /* definition of registers in kvm_run */
> >>  struct kvm_sync_regs {
> >> diff --git a/arch/x86/include/uapi/asm/kvm.h 
> >> b/arch/x86/include/uapi/asm/kvm.h
> >> index d7dcef5..1438202 100644
> >> --- a/arch/x86/include/uapi/asm/kvm.h
> >> +++ b/arch/x86/include/uapi/asm/kvm.h
> >> @@ -250,8 +250,8 @@ struct kvm_debug_exit_arch {
> >>__u64 dr7;
> >>  };
> >>  
> >> -#define KVM_GUESTDBG_USE_SW_BP0x0001
> >> -#define KVM_GUESTDBG_USE_HW_BP0x0002
> >> +#define KVM_GUESTDBG_USE_SW_BP__KVM_GUESTDBG_USE_SW_BP
> >> +#define KVM_GUESTDBG_USE_HW_BP__KVM_GUESTDBG_USE_HW_BP
> >>  #define KVM_GUESTDBG_INJECT_DB0x0004
> >>  #define KVM_GUESTDBG_INJECT_BP0x0008
> >>  
> >> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> >> index 5eedf84..ce2db14 100644
> >> --- a/include/uapi/linux/kvm.h
> >> +++ b/include/uapi/linux/kvm.h
> >> @@ -525,8 +525,16 @@ struct kvm_s390_irq {
> >>  
> >>  /* for KVM_SET_GUEST_DEBUG */
> >>  
> >> -#define KVM_GUESTDBG_ENABLE   0x0001
> >> -#define KVM_GUESTDBG_SINGLESTEP   0x0002
> >> +#define KVM_GUESTDBG_ENABLE   (1 << 0)
> >> +#define KVM_GUESTDBG_SINGLESTEP   (1 << 1)
> >> +
> >> +/*
> >> + * Architecture specific stuff uses the top 16 bits of the field,
> >
> > can you be more specific than 'stuff' here?  features?
> >
> >> + * however there is some shared commonality for the common cases
> >
> > I don't like this sentence; shared commonality is a pleonasm and the use
> > of however makes it sounds like there's some caveat here.
> 
> OK I can see that - after I looked it up ;-)
> 
> > If the top 16 bits are indeed arhictecture specific, then I think they
> > should just be defined in their architecture specific headers.  Unless
> > the idea here is that there's a fixed set of of flags that architectures
> > can choose to support, in which case it should simply be defined in the
> > common header.
> 
> Well an architecture might not support some features and want to use
> those bits for something else? I didn't want to force the bottom two
> of the architecture specific bits to wasted if the features don't exist.
> 
In that case I think the definition is local to each architecture and
should indeed just be duplicated.  The __ definitions complicate more
than they help as they are exported to userspace etc.  The KVM
maintainers may have a different view on this though.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-14 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:04PM +0100, Alex Bennée wrote:
> This adds support for SW breakpoints inserted by userspace.
> 
> We do this by trapping all BKPT exceptions in the
> hypervisor (MDCR_EL2_TDE).

you mean trapping all exceptions in the guest to the hypervisor?

> The kvm_debug_exit_arch carries the address
> of the exception.

why?  can userspace not simply read out the PC using GET_ONE_REG?

> If user-space doesn't know of the breakpoint then we
> have a guest inserted breakpoint and the hypervisor needs to start again
> and deliver the exception to guest.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>   - update to use new exit struct
>   - tweak for C setup
>   - do our setup in debug_setup/clear code
>   - fixed up comments
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 06c5064..17d4f9c 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2626,7 +2626,7 @@ when running. Common control bits are:
>  The top 16 bits of the control field are architecture specific control
>  flags which can include the following:
>  
> -  - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86]
> +  - KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
>- KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
>- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
>- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 7ea8b0e..d3bc8dc 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -304,7 +304,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_arm_set_running_vcpu(NULL);
>  }
>  
> -#define KVM_GUESTDBG_VALID (KVM_GUESTDBG_ENABLE)
> +#define KVM_GUESTDBG_VALID (KVM_GUESTDBG_ENABLE|KVM_GUESTDBG_USE_SW_BP)

nit: spaces around the operator

>  
>  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>   struct kvm_guest_debug *dbg)
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 8a29d0b..cff0475 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -45,11 +45,18 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | MDCR_EL2_TPMCR);
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TDRA | MDCR_EL2_TDOSA);
>  
> + /* Trap debug register access? */

other patch

>   if (!vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY)
>   vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
>   else
>   vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
>  
> + /* Trap breakpoints? */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
> + vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
> + else
> + vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;

so now you're trapping all debug exceptions, right?

what happens if the guest is using the hardware to debug debug stuff and
generates other kinds of debug exceptions, like a hardware breakpoint,
will we not see an unhandled exception and the guest being forcefully
killed?

> +
>  }
>  
>  void kvm_arch_clear_debug(struct kvm_vcpu *vcpu)
> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> index 524fa25..ed1bbb4 100644
> --- a/arch/arm64/kvm/handle_exit.c
> +++ b/arch/arm64/kvm/handle_exit.c
> @@ -82,6 +82,37 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>   return 1;
>  }
>  
> +/**
> + * kvm_handle_debug_exception - handle a debug exception instruction

handle a software breadkpoint exception

> + *
> + * @vcpu:the vcpu pointer
> + * @run: access to the kvm_run structure for results
> + *
> + * We route all debug exceptions through the same handler as we

all debug exceptions?  software breakpoints and all?  then why the above
shot text?

> + * just need to report the PC and the HSR values to userspace.
> + * Userspace may decide to re-inject the exception and deliver it to
> + * the guest if it wasn't for the host to deal with.

now I'm confused - does userspace setup the guest to receive an
exception or does it tell KVM to emulate an exception for the guest or
do we execute the breakpoint without trapping the debug exception?

> + */
> +static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run *run)
> +{
> + u32 hsr = kvm_vcpu_get_hsr(vcpu);
> +
> + run->exit_reason = KVM_EXIT_DEBUG;
> + run->debug.arch.hsr = hsr;
> +
> + switch (hsr >> ESR_ELx_EC_SHIFT) {
> + case ESR_ELx_EC_BKPT32:
> + case ESR_ELx_EC_BRK64:
> + run->debug.arch.pc = *vcpu_pc(vcpu);
> + break;
> + default:
> + kvm_err("%s: un-handled case hsr: %#08x\n",
> + __func__, (unsigned int) hsr);

this should never happen right?

> + break;
> + }
> + return 0;
> +}
> +
>  static exit_handle_fn arm_exit_handlers[] = {
>   [ESR_ELx_EC_WFx]= kvm_han

Re: [PATCH v2 07/10] KVM: arm64: guest debug, add support for single-step

2015-04-14 Thread Christoffer Dall
Hi Alex,

On Tue, Mar 31, 2015 at 04:08:05PM +0100, Alex Bennée wrote:
> This adds support for single-stepping the guest. As userspace can and
> will manipulate guest registers before restarting any tweaking of the
> registers has to occur just before control is passed back to the guest.

this sentence is hard to read.  Do you mean:

(a) As userspace can and will manipulate guest register, we must ensure
that any tweaking of the registers before restarting the guest happens
immediately before...

or

(b) As userspace manipulates guest registers before restarting the
guest, we must ensure that any tweaking of the register happens
immediately before...

> Furthermore while guest debugging is in effect we need to squash the

Furthermore, while guest debugging is in effect,
(commas)

> ability of the guest to single-step itself as we have no easy way of
> re-entering the guest after the exception has been delivered to the
> hypervisor.

I'm not sure I understand this last part of the sentence.  Is the point
that if we trap on a guest single-step exception we cannot easily inject
such an exception back into the guest and therefore we trap the guest if
it tries to set itself up for single-stepping?

What is our recourse then?  To just ignore the single-step setting of
the guest and execute it as normal (while single-stepping the guest from
the outside)?

> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>   - Move pstate/mdscr manipulation into C
>   - don't export guest_debug to assembly
>   - add accessor for saved_debug regs
>   - tweak save/restore of mdscr_el1
> 
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index d3bc8dc..c1ed8cb 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -304,7 +304,21 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_arm_set_running_vcpu(NULL);
>  }
>  
> -#define KVM_GUESTDBG_VALID (KVM_GUESTDBG_ENABLE|KVM_GUESTDBG_USE_SW_BP)
> +#define KVM_GUESTDBG_VALID (KVM_GUESTDBG_ENABLE |\
> + KVM_GUESTDBG_USE_SW_BP | \
> + KVM_GUESTDBG_SINGLESTEP)
> +
> +/**
> + * kvm_arch_vcpu_ioctl_set_guest_debug - Setup guest debugging
> + * @kvm: pointer to the KVM struct
> + * @kvm_guest_debug: the ioctl data buffer
> + *
> + * This sets up the VM for guest debugging. Care has to be taken when
> + * manipulating guest registers as these will be set/cleared by the
> + * hyper-visor controller, typically before each kvm_run event. As a

which guest registers are set/cleared by userspace exactly?

s/by the hyper-visor controller/by userspace/

> + * result modification of the guest registers needs to take place

As a result, (comma)

s/needs to/must/

> + * after they have been restored in the hyp.S trampoline code.

trampoline code?  The trampoline code we are referring to is in
hyp-init.S.  Do you mean in EL2?  Then just sya in hyp.S or say in EL2
or in hyp mode.

> + */
>  
>  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>   struct kvm_guest_debug *dbg)
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 0631840..6a33647 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -121,6 +121,13 @@ struct kvm_vcpu_arch {
>* here.
>*/
>  
> + /* Registers pre any guest debug manipulations */

I couldn't find 'pre' as an independent word in any English
dictionaries.  I'm also not entirely sure what you mean?  Who modifies
this when, and why do we need to store this?

> + struct {
> + u32 pstate_ss_bit;
> + u32 mdscr_el1_bits;
> +
> + } debug_saved_regs;

If I understood this state correctly (see below), then guest_debug_state
is probably a better name for this struct.

> +
>   /* Don't run the guest */
>   bool pause;
>  
> @@ -143,6 +150,7 @@ struct kvm_vcpu_arch {
>  
>  #define vcpu_gp_regs(v)  (&(v)->arch.ctxt.gp_regs)
>  #define vcpu_sys_reg(v,r)((v)->arch.ctxt.sys_regs[(r)])
> +#define vcpu_debug_saved_reg(v, r) ((v)->arch.debug_saved_regs.r)

hmm, not sure this is warranted if the 'saved_regs' is not the current
state of the VM, which is sort of what the vcpu_gp_regs() and friends
hint at.  Maybe I'm just not getting exactly what piece of state it is.

>  /*
>   * CP14 and CP15 live in the same array, as they are backed by the
>   * same system registers.
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index cff0475..b32362c 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -19,8 +19,16 @@
>  
>  #include 
>  
> +#include 
> +#include 
>  #include 
>  #include 
> +#include 
> +
> +/* These are the bits of MDSCR_EL1 we may mess with */

we may mess with?  Can you be more specific?

> +#define MDSCR_EL1_DEBUG_BITS (DBG_MDSCR_SS | \
> + DBG_MDSCR_KDE | \
> + DBG_MDSCR_MDE)
>  
>  /**
>   * kvm_arch_se

Re: [PATCH v2 08/10] KVM: arm64: guest debug, HW assisted debug support

2015-04-14 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:06PM +0100, Alex Bennée wrote:
> This adds support for userspace to control the HW debug registers for
> guest debug. We'll only copy the $ARCH defined number across as that is
> all that hyp.S will use anyway. 

I don't really understand what this sentence means?

> I've moved some helper functions into
> the hw_breakpoint.h header for re-use.
> 
> As with single step we need to tweak the guest registers to enable the
> exceptions so we need to save and restore those bits.
> 
> Two new capabilities have been added to the KVM_EXTENSION ioctl to allow
> userspace to query the number of hardware break and watch points
> available on the host hardware.
> 
> As QEMU tests for watchpoints based on the address and not the PC we
> also need to export the value of far_el2 to userspace.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>- switched to C setup
>- replace host debug registers directly into context
>- minor tweak to api docs
>- setup right register for debug
>- add FAR_EL2 to debug exit structure
>- add support fro trapping debug register access
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 17d4f9c..ac34093 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2627,7 +2627,7 @@ The top 16 bits of the control field are architecture 
> specific control
>  flags which can include the following:
>  
>- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
> -  - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
> +  - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
>- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
>- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
>- KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
> @@ -2642,6 +2642,10 @@ updated to the correct (supplied) values.
>  The second part of the structure is architecture specific and
>  typically contains a set of debug registers.
>  
> +For arm64 the number of debug registers is implementation defined and
> +can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
> +KVM_CAP_GUEST_DEBUG_HW_WPS capabilities.
> +

can you document their behavior more specifically?  I assume they both
return 0 if HW assisted debugging is not supported and return the number
of implemented hardware registers otherwise?

How does this work on big.LITTLE systems where cores may have a different
number of implemented registers?

>  When debug events exit the main run loop with the reason
>  KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
>  structure containing architecture specific debug information.
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index c1ed8cb..a286026 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -306,6 +306,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  
>  #define KVM_GUESTDBG_VALID (KVM_GUESTDBG_ENABLE |\
>   KVM_GUESTDBG_USE_SW_BP | \
> + KVM_GUESTDBG_USE_HW_BP | \
>   KVM_GUESTDBG_SINGLESTEP)
>  
>  /**
> @@ -328,6 +329,26 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> *vcpu,
>   return -EINVAL;
>  
>   vcpu->guest_debug = dbg->control;
> +
> + /* Hardware assisted Break and Watch points */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> + int nb = get_num_brps();
> + int nw = get_num_wrps();
> +
> + /* Copy across up to IMPDEF debug registers to our
> +  * shadow copy in the vcpu structure. The debug code
> +  * will then set them up before we re-enter the guest.
> +  */
> + memcpy(vcpu->arch.guest_debug_regs.dbg_bcr,
> + dbg->arch.dbg_bcr, sizeof(__u64)*nb);
> + memcpy(vcpu->arch.guest_debug_regs.dbg_bvr,
> + dbg->arch.dbg_bvr, sizeof(__u64)*nb);
> + memcpy(vcpu->arch.guest_debug_regs.dbg_wcr,
> + dbg->arch.dbg_wcr, sizeof(__u64)*nw);
> + memcpy(vcpu->arch.guest_debug_regs.dbg_wvr,
> + dbg->arch.dbg_wvr, sizeof(__u64)*nw);
> + }
> +
>   } else {
>   /* If not enabled clear all flags */
>   vcpu->guest_debug = 0;
> diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
> b/arch/arm64/include/asm/hw_breakpoint.h
> index 52b484b..c450552 100644
> --- a/arch/arm64/include/asm/hw_breakpoint.h
> +++ b/arch/arm64/include/asm/hw_breakpoint.h
> @@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct 
> task_struct *task)
>  }
>  #endif
>  
> +/* Determine number of BRP registers available. */
> +static inline int get_num_brps(vo

Re: [PATCH v2 08/10] KVM: arm64: guest debug, HW assisted debug support

2015-04-14 Thread Christoffer Dall
On Fri, Apr 10, 2015 at 02:25:21PM +0200, Andrew Jones wrote:

[...]

> > --- a/arch/arm64/kvm/sys_regs.c
> > +++ b/arch/arm64/kvm/sys_regs.c
> > @@ -196,16 +196,49 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu 
> > *vcpu,
> >   * - If the dirty bit is set, save guest registers, restore host
> >   *   registers and clear the dirty bit. This ensure that the host can
> >   *   now use the debug registers.
> > + *
> > + * We also use this mechanism to set-up the debug registers for guest
> setup

since I'm in this mood:

setup: noun or adjective
set-up: noun derived from the phrasal verb, example "Run! It's a set-up."
set up: verb

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 09/10] KVM: arm64: trap nested debug register access

2015-04-14 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 08:59:21AM +0100, Alex Bennée wrote:

[...]

> >> +  /* MDSCR_EL1 */
> >> +  if (r->reg == MDSCR_EL1) {
> >> +  if (p->is_write)
> >> +  vcpu_debug_saved_reg(vcpu, mdscr_el1) =
> >> +  *vcpu_reg(vcpu, p->Rt);
> >> +  else
> >> +  *vcpu_reg(vcpu, p->Rt) =
> >> +  vcpu_debug_saved_reg(vcpu, mdscr_el1);
> >
> > With this lines wrapping, {}'s might be nice.
> 
> My natural inclination is to wrap in {}'s but I know the kernel is a fan
> of the single-statement if forms.
> 
It's accepted to use braces for multi-line single statements - and I
prefer it too :)

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 09/10] KVM: arm64: trap nested debug register access

2015-04-14 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:07PM +0100, Alex Bennée wrote:
> When we are using the hardware registers for guest debug we need to deal
> with the guests access to them. There is already a mechanism for dealing
> with these accesses so we build on top of that.
> 
>   - mdscr_el1_bits is renamed as we save the whole register
>   - any access to mdscr_el1 is now stored in the mirror location
>   - if we are using HW assisted debug we do the same with DBG[WB][CV]R
> 
> There is one register (MDCCINT_EL1) which guest debug doesn't care about
> so this behaves as before.
> 
> Signed-off-by: Alex Bennée 
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 2c359c9..3d32d45 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -122,10 +122,13 @@ struct kvm_vcpu_arch {
>* here.
>*/
>  
> - /* Registers pre any guest debug manipulations */
> + /* Registers before any guest debug manipulations. These
> +  * shadow registers are updated by the kvm_handle_sys_reg
> +  * trap handler if the guest accesses or updates them
> +  */
>   struct {
>   u32 pstate_ss_bit;
> - u32 mdscr_el1_bits;
> + u32 mdscr_el1;
>  
>   struct kvm_guest_debug_arch debug_regs;
>   } debug_saved_regs;
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 3b368f3..638c111 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -55,8 +55,6 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | MDCR_EL2_TPMCR);
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TDRA | MDCR_EL2_TDOSA);
>  
> - trace_kvm_arch_setup_debug_reg32("MDCR_EL2", vcpu->arch.mdcr_el2);
> -
>   /*
>* If we are not treating debug registers are dirty we need
>* to trap if the guest starts accessing them.
> @@ -71,8 +69,10 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   /* Save pstate/mdscr */
>   vcpu_debug_saved_reg(vcpu, pstate_ss_bit) =
>   *vcpu_cpsr(vcpu) & DBG_SPSR_SS;
> - vcpu_debug_saved_reg(vcpu, mdscr_el1_bits) =
> - vcpu_sys_reg(vcpu, MDSCR_EL1) & MDSCR_EL1_DEBUG_BITS;
> +
> + vcpu_debug_saved_reg(vcpu, mdscr_el1) =
> + vcpu_sys_reg(vcpu, MDSCR_EL1);
> +

you can avoid this churn in the patches by following Drew's advice to a
previous patch.

>   /*
>* Single Step (ARM ARM D2.12.3 The software step state
>* machine)
> @@ -161,9 +161,8 @@ void kvm_arch_clear_debug(struct kvm_vcpu *vcpu)
>   *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
>   *vcpu_cpsr(vcpu) |= vcpu_debug_saved_reg(vcpu, pstate_ss_bit);
>  
> - vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~MDSCR_EL1_DEBUG_BITS;
> - vcpu_sys_reg(vcpu, MDSCR_EL1) |=
> - vcpu_debug_saved_reg(vcpu, mdscr_el1_bits);
> + vcpu_sys_reg(vcpu, MDSCR_EL1) =
> + vcpu_debug_saved_reg(vcpu, mdscr_el1);
>  
>   /*
>* If we were using HW debug we need to restore the
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index be9b188..d43d7d1 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -208,39 +208,61 @@ static bool trap_debug_regs(struct kvm_vcpu *vcpu,
>   const struct sys_reg_params *p,
>   const struct sys_reg_desc *r)
>  {
> - if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
> - struct kvm_guest_debug_arch *saved;
> - __u64 *val;
> -
> - saved = &vcpu_debug_saved_reg(vcpu, debug_regs);
> -
> - if (r->reg >= DBGBCR0_EL1 && r->reg <= DBGBCR15_EL1)
> - val = &saved->dbg_bcr[r->reg - DBGBCR0_EL1];
> - else if (r->reg >= DBGBVR0_EL1 && r->reg <= DBGBVR15_EL1)
> - val = &saved->dbg_bvr[r->reg - DBGBVR0_EL1];
> - else if (r->reg >= DBGWCR0_EL1 && r->reg <= DBGWCR15_EL1)
> - val = &saved->dbg_wcr[r->reg - DBGWCR0_EL1];
> - else if (r->reg >= DBGWVR0_EL1 && r->reg <= DBGWVR15_EL1)
> - val = &saved->dbg_wvr[r->reg - DBGWVR0_EL1];
> - else {
> - kvm_err("Bad register index %d\n", r->reg);
> - return false;
> + if (vcpu->guest_debug) {
> +
> + /* MDSCR_EL1 */
> + if (r->reg == MDSCR_EL1) {
> + if (p->is_write)
> + vcpu_debug_saved_reg(vcpu, mdscr_el1) =
> + *vcpu_reg(vcpu, p->Rt);
> + else
> + *vcpu_reg(vcpu, p->Rt) =
> + vcpu_debug_saved_reg(vcpu, mdscr_el1);
> +
> + return t

Re: [PATCH v2 10/10] KVM: arm64: add trace points for guest_debug debug

2015-04-14 Thread Christoffer Dall
On Tue, Mar 31, 2015 at 04:08:08PM +0100, Alex Bennée wrote:
> This includes trace points for:
>   kvm_arch_setup_guest_debug
>   kvm_arch_clear_guest_debug
>   kvm_handle_guest_debug
> 
> I've also added some generic register setting trace events so I can
> watch the register values being built up over time. The local
> dump_dbg_regs() function dumps all the HW BKPT and WPT registers.
> 
> I've also added a #define trace_dreg to shorten some lines.
> 
> Signed-off-by: Alex Bennée 
> 
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 638c111..7c96288 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -25,12 +25,37 @@
>  #include 
>  #include 
>  
> +#include "trace.h"
> +
> +#define trace_dreg(name, value) trace_kvm_arch_setup_debug_reg32(name, value)
> +
>  /* These are the bits of MDSCR_EL1 we may mess with */
>  #define MDSCR_EL1_DEBUG_BITS (DBG_MDSCR_SS | \
>   DBG_MDSCR_KDE | \
>   DBG_MDSCR_MDE)
>  
>  /**
> + * dump_dbg_regs - simple debug helper
> + *
> + * This provides a simple helper to dump the HW debug registers
> + */
> +static void dump_dbg_regs(struct kvm_vcpu *vcpu, int nb, int nw)
> +{
> + int i;
> +
> + for (i = 0; i < nb; i++) {
> + trace_printk("bkpt%d: 0x%08x:0x%llx\n", i,
> + (u32) vcpu_sys_reg(vcpu, DBGBCR0_EL1 + i),
> + vcpu_sys_reg(vcpu, DBGBVR0_EL1 + i));
> + }
> + for (i = 0; i < nb; i++) {
> + trace_printk("wtpt%d: 0x%08x:0x%llx\n", i,
> + (u32) vcpu_sys_reg(vcpu, DBGWCR0_EL1 + i),
> + vcpu_sys_reg(vcpu, DBGWVR0_EL1 + i));
> + }
> +}
> +
> +/**
>   * kvm_arch_setup_debug - set-up debug related stuff
>   *
>   * @vcpu:the vcpu pointer
> @@ -52,9 +77,13 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>  {
>   bool trap_debug = false;
>  
> + trace_kvm_arch_setup_debug(vcpu->guest_debug);
> +
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM | MDCR_EL2_TPMCR);
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TDRA | MDCR_EL2_TDOSA);
>  
> + trace_kvm_arch_setup_debug_reg32("MDCR_EL2", vcpu->arch.mdcr_el2);
> +
>   /*
>* If we are not treating debug registers are dirty we need
>* to trap if the guest starts accessing them.
> @@ -66,6 +95,8 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   if (vcpu->guest_debug) {
>   vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
>  
> + trace_dreg("MDCR_EL2", vcpu->arch.mdcr_el2);
> +
>   /* Save pstate/mdscr */
>   vcpu_debug_saved_reg(vcpu, pstate_ss_bit) =
>   *vcpu_cpsr(vcpu) & DBG_SPSR_SS;
> @@ -73,6 +104,11 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu_debug_saved_reg(vcpu, mdscr_el1) =
>   vcpu_sys_reg(vcpu, MDSCR_EL1);
>  
> + trace_dreg("Save: PSTATE.SS",
> + vcpu_debug_saved_reg(vcpu, pstate_ss_bit));
> + trace_dreg("Save: MDSCR",
> + vcpu_debug_saved_reg(vcpu, mdscr_el1));
> +
>   /*
>* Single Step (ARM ARM D2.12.3 The software step state
>* machine)
> @@ -88,6 +124,8 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   *vcpu_cpsr(vcpu) &= ~DBG_SPSR_SS;
>   vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
>   }
> + trace_dreg("SPSR_EL2", *vcpu_cpsr(vcpu));
> + trace_dreg("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
>  
>   /*
>* HW Break/Watch points
> @@ -136,6 +174,9 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>  &host->dbg_wvr,
>  sizeof(__u64)*nw);
>  
> + if (trace_kvm_arch_setup_debug_reg32_enabled())
> + dump_dbg_regs(vcpu, nb, nw);
> +
>   /* Make sure hyp.S copies them in/out */
>   vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
>   /* Also track guest changes */
> @@ -147,15 +188,24 @@ void kvm_arch_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;
>   }
>  
> + trace_kvm_arch_setup_debug_reg32("MDCR_EL2", vcpu->arch.mdcr_el2);
> + trace_kvm_arch_setup_debug_reg32("MDSCR_EL1",
> + vcpu_sys_reg(vcpu, MDSCR_EL1));
> +
> +
>   /* Trap debug register access? */
>   if (trap_debug)
>   vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
>   else
>   vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
> +
> + trace_kvm_arch_setup_debug_reg32("MDCR_EL2", vcpu->arch.mdcr_el2);
>  }
>  
>  void kvm_arch_clear_debug(struct kvm_vcpu *vcpu)
>  {
> + trace_kvm_arch_clear_debug(vcpu->guest_debug);
> +
>   if (vcpu->guest_debug) {
>   /* Restore pstate/mdscr bits we may have messed with 

Re: [PATCH] KVM: arm: irqfd: fix value returned by kvm_irq_map_gsi

2015-04-14 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 03:01:59PM +0200, Eric Auger wrote:
> irqfd/arm curently does not support routing. kvm_irq_map_gsi is
> supposed to return all the routing entries associated with the
> provided gsi and return the number of those entries. We should
> return 0 at this point.
> 
> Signed-off-by: Eric Auger 
> ---
>  virt/kvm/arm/vgic.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index 9ad074e..9b4f7d4 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -1947,7 +1947,7 @@ int kvm_irq_map_gsi(struct kvm *kvm,
>   struct kvm_kernel_irq_routing_entry *entries,
>   int gsi)
>  {
> - return gsi;
> + return 0;
>  }
>  
>  int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin)
> -- 

Acked-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Linaro-uefi] UEFI on KVM fails to start on juno on cortex-a57 cluster

2015-04-14 Thread Christoffer Dall
On Mon, Apr 13, 2015 at 11:04:00AM +0200, Ard Biesheuvel wrote:
> On 27 March 2015 at 01:02, Ard Biesheuvel  wrote:
> > On 26 March 2015 at 09:09, Riku Voipio  wrote:
> >> On 25 March 2015 at 21:32, Ard Biesheuvel  
> >> wrote:
> >>> On 25 March 2015 at 17:14, Ard Biesheuvel  
> >>> wrote:
>  On 25 March 2015 at 17:14, Ard Biesheuvel  
>  wrote:
> > On 25 March 2015 at 07:59, Riku Voipio  wrote:
> >> Hi,
> >>
> >> It appears on juno, I can start kvm with UEFI only on cortex-a53 cores:
> >>
> >>
> >> taskset -c 0 qemu-system-aarch64 -m 1024 -cpu host -M virt -bios
> >> QEMU_EFI.fd -enable-kvm -nographic
> >> -> works:
> >> UEFI Interactive Shell v2.0
> >> taskset -c 1 qemu-system-aarch64 -m 1024 -cpu host -M virt -bios
> >> QEMU_EFI.fd -enable-kvm -nographic
> >> -> hangs at cpu spinning 100%
> >> ...
> >>
> >
> > I can reproduce the hang, both with your UEFI binary and my own release 
> > build.
> > The debug build works fine, unfortunately...
> >
> 
>  Tianocore built from master as of today, that is.
> 
> >>>
> >>> OK, it appears that we were missing some cache maintenance. It is not
> >>> obvious how that should affect A57 only, but with these patches, I can
> >>> now reliably run the release version on my Seattle A57
> >>
> >>> https://git.linaro.org/people/ard.biesheuvel/uefi-next.git/shortlog/refs/heads/qemu-xen-cache-maintenance
> >>
> >> Thanks. Do you know when there would be a new build on releases or 
> >> snapshots?
> >>
> >
> > Let me check with Leif. We have another candidate patch now that he
> > could perhaps apply and kick off a build?
> >
> 
> I now have independent confirmation (from Laszlo Ersek) that the cache
> maintenance patches I am proposing fix the issue on Seattle.

Which patches are those?  For UEFI?

> Hopefully this means Juno is fixed as well.
> 
> I am trying to get a snapshot out asap, today or tomorrow perhaps?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: linux-next: manual merge of the kvm-arm tree with Linus' tree

2015-04-16 Thread Christoffer Dall
Hi Paolo and Marc,

On Tue, Apr 07, 2015 at 06:20:15PM +0200, Paolo Bonzini wrote:
> 
> 
> On 18/03/2015 08:55, Christoffer Dall wrote:
> > Hi Stephen,
> > 
> > On Wed, Mar 18, 2015 at 02:41:11PM +1100, Stephen Rothwell wrote:
> >> Hi all,
> >>
> >> Today's linux-next merge of the kvm-arm tree got a conflict in
> >> virt/kvm/arm/vgic.c between commit ae705930fca6 ("arm/arm64: KVM: Keep
> >> elrsr/aisr in sync with software model") from Linus' tree and commit
> >> 71760950bf3d ("arm/arm64: KVM: add a common vgic_queue_irq_to_lr fn")
> >> from the kvm-arm tree.
> >>
> >> I fixed it up (I think - see below) and can carry the fix as necessary
> >> (no action is required).
> >>
> >> -- 
> >> Cheers,
> >> Stephen Rothwells...@canb.auug.org.au
> >>
> >> diff --cc virt/kvm/arm/vgic.c
> >> index c9f60f524588,ffd937ca5141..
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@@ -982,9 -1092,7 +1098,8 @@@ bool vgic_queue_irq(struct kvm_vcpu *vc
> >>if (vlr.source == sgi_source_id) {
> >>kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
> >>BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
> >> -  vlr.state |= LR_STATE_PENDING;
> >> -  vgic_set_lr(vcpu, lr, vlr);
> >> +  vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
> >>  + vgic_sync_lr_elrsr(vcpu, lr, vlr);
> >>return true;
> >>}
> >>}
> >> @@@ -1001,12 -1109,8 +1116,9 @@@
> >>   
> >>vlr.irq = irq;
> >>vlr.source = sgi_source_id;
> >> -  vlr.state = LR_STATE_PENDING;
> >> -  if (!vgic_irq_is_edge(vcpu, irq))
> >> -  vlr.state |= LR_EOI_INT;
> >> - 
> >> -  vgic_set_lr(vcpu, lr, vlr);
> >> +  vlr.state = 0;
> >> +  vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
> >>  + vgic_sync_lr_elrsr(vcpu, lr, vlr);
> >>   
> >>return true;
> >>   }
> > 
> > Looks great, thanks!
> > -Christoffer
> 
> Got the same conflict when pulling from the kvm-arm tree, I used
> a different resolution though:
> 
> diff --cc virt/kvm/arm/vgic.c
> index c9f60f524588,b70174e74868..8d550ff14700
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@@ -955,6 -1095,25 +1101,26 @@@ static void vgic_retire_disabled_irqs(s
>   }
>   }
>   
> + static void vgic_queue_irq_to_lr(struct kvm_vcpu *vcpu, int irq,
> +  int lr_nr, struct vgic_lr vlr)
> + {
> + if (vgic_irq_is_active(vcpu, irq)) {
> + vlr.state |= LR_STATE_ACTIVE;
> + kvm_debug("Set active, clear distributor: 0x%x\n", vlr.state);
> + vgic_irq_clear_active(vcpu, irq);
> + vgic_update_state(vcpu->kvm);
> + } else if (vgic_dist_irq_is_pending(vcpu, irq)) {
> + vlr.state |= LR_STATE_PENDING;
> + kvm_debug("Set pending: 0x%x\n", vlr.state);
> + }
> + 
> + if (!vgic_irq_is_edge(vcpu, irq))
> + vlr.state |= LR_EOI_INT;
> + 
> + vgic_set_lr(vcpu, lr_nr, vlr);
> ++vgic_sync_lr_elrsr(vcpu, lr_nr, vlr);
> + }
> + 
>   /*
>* Queue an interrupt to a CPU virtual interface. Return true on success,
>* or false if it wasn't possible to queue it.
> @@@ -982,9 -1141,7 +1148,7 @@@ bool vgic_queue_irq(struct kvm_vcpu *vc
> if (vlr.source == sgi_source_id) {
> kvm_debug("LR%d piggyback for IRQ%d\n", lr, vlr.irq);
> BUG_ON(!test_bit(lr, vgic_cpu->lr_used));
> -   vlr.state |= LR_STATE_PENDING;
> -   vgic_set_lr(vcpu, lr, vlr);
> -   vgic_sync_lr_elrsr(vcpu, lr, vlr);
> +   vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
> return true;
> }
> }
> @@@ -1001,12 -1158,8 +1165,8 @@@
>   
> vlr.irq = irq;
> vlr.source = sgi_source_id;
> -   vlr.state = LR_STATE_PENDING;
> -   if (!vgic_irq_is_edge(vcpu, irq))
> -   vlr.state |= LR_EOI_INT;
> - 
> -   vgic_set_lr(vcpu, lr, vlr);
> -   vgic_sync_lr_elrsr(vcpu, lr, vlr);
> +   vlr.state = 0;
> +   vgic_queue_irq_to_lr(vcpu, irq, lr, vlr);
>   
> return true;
>   }
> 
> 
> Christoffer, this is the same logic as Stephen's resolution, but
> can you confirm that it makes sense "semantically" as well?
> 
> (Stephen, you'll still get the conflicts in linux-next for a
> couple of days as I finish local testing of KVM changes for 4.1).
> 
As it turns out, it was not the same logic as Stephen's resolution.
Stephen's resolution is bussy, because vlr is passed by value to
vgic_queue_irq_to_lr() and therefore the call to sync the elrsr does not
have any effect.

Unfortunately, it seems Paolo's more correct fix did not end up in
Linus' tree, so I guess I should just send a patch?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: linux-next: manual merge of the kvm-arm tree with Linus' tree

2015-04-16 Thread Christoffer Dall
On Thu, Apr 16, 2015 at 09:39:06PM +0200, Paolo Bonzini wrote:
> 
> 
> On 16/04/2015 21:10, Christoffer Dall wrote:
> >> > 
> > As it turns out, it was not the same logic as Stephen's resolution.
> > Stephen's resolution is bussy, because vlr is passed by value to
> > vgic_queue_irq_to_lr() and therefore the call to sync the elrsr does not
> > have any effect.
> > 
> > Unfortunately, it seems Paolo's more correct fix did not end up in
> > Linus' tree, so I guess I should just send a patch?
> 
> Uhm, sure it did :)
> 
> http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/virt/kvm/arm/vgic.c#n1121
> 

I'm an idiot; I looked at 4.0 instead of master, sorry for the noise.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: ARM: PCI devices emulation broken with KVM due to cache issue

2015-04-17 Thread Christoffer Dall
On Thu, Apr 16, 2015 at 06:02:27PM +0200, Paolo Bonzini wrote:
> 
> 
> On 16/04/2015 17:54, Jérémy Fanguède wrote:
> > The guest kernel driver of the lsi device fails to enable it correctly
> > with a cache error:
> > [...]
> > sym53c8xx :00:01.0: enabling device (0100 -> 0103)
> > sym0: <895a> rev 0x0 at pci :00:01.0 irq 54
> > sym0: No NVRAM, ID 7, Fast-40, LVD, parity checking
> > CACHE TEST FAILED: chip wrote 2, host read 1.
> > sym0: CACHE INCORRECTLY CONFIGURED.
> > sym0: giving up ...
> > [...]
> 
> Note that this is just a failure in the driver self-test.  It has
> nothing to do with the processor cache (though there are other problems
> with PCI and the processor cache in KVM mode).
> 
> Do not use this QEMU device.  The emulation is incomplete and it's slow.
>  Use virtio-scsi or megasas instead.  Still, I'm not sure that would
> work with KVM; as far as I know, most work on the ARM PCI host bridge
> was done using UEFI firmware.
> 
I've been running a number of tests lately using virtio-net-pci and
virtio-blk-pci on XGene at it works like a charm.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: ARM: PCI devices emulation broken with KVM due to cache issue

2015-04-17 Thread Christoffer Dall
On Fri, Apr 17, 2015 at 03:48:35PM +0200, Jérémy Fanguède wrote:
> On Fri, Apr 17, 2015 at 11:29 AM, Christoffer Dall
>  wrote:
> > On Thu, Apr 16, 2015 at 06:02:27PM +0200, Paolo Bonzini wrote:
> >>
> >>
> >> On 16/04/2015 17:54, Jérémy Fangučde wrote:
> >> > The guest kernel driver of the lsi device fails to enable it correctly
> >> > with a cache error:
> >> > [...]
> >> > sym53c8xx :00:01.0: enabling device (0100 -> 0103)
> >> > sym0: <895a> rev 0x0 at pci :00:01.0 irq 54
> >> > sym0: No NVRAM, ID 7, Fast-40, LVD, parity checking
> >> > CACHE TEST FAILED: chip wrote 2, host read 1.
> >> > sym0: CACHE INCORRECTLY CONFIGURED.
> >> > sym0: giving up ...
> >> > [...]
> >>
> >> Note that this is just a failure in the driver self-test.  It has
> >> nothing to do with the processor cache (though there are other problems
> >> with PCI and the processor cache in KVM mode).
> >>
> >> Do not use this QEMU device.  The emulation is incomplete and it's slow.
> >>  Use virtio-scsi or megasas instead.  Still, I'm not sure that would
> >> work with KVM; as far as I know, most work on the ARM PCI host bridge
> >> was done using UEFI firmware.
> >>
> > I've been running a number of tests lately using virtio-net-pci and
> > virtio-blk-pci on XGene at it works like a charm.
> 
> It should be noted that all these devices work fine on x86 with KVM
> enabled.
> 
> Of course, not all PCI devices are affected, e.g. virtio-* devices
> work fine for me also. However, should this failure be considered
> normal with KVM on ARM, while it works on x86?
> 
> For instance, let's take the use case where there is no virtio
> alternative: USB devices (passthrough or emulated) attached to a PCI
> USB controller, how is this supposed to work with KVM on ARM?
> The only way to make usb-ehci working that I am aware, is
> with this cache hack.

As Paolo said, we know there are cache issues on KVM/ARM and we are
working through.  Patches are as always welcome ;)

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC PATCH 1/3] KVM: promote KVM_MEMSLOT_INCOHERENT to uapi

2015-04-20 Thread Christoffer Dall
On Wed, Mar 18, 2015 at 03:10:31PM -0400, Andrew Jones wrote:
> Also rename to KVM_MEM_UNCACHED.
> 
> Signed-off-by: Andrew Jones 
> ---
>  Documentation/virtual/kvm/api.txt | 16 ++--
>  arch/arm/include/uapi/asm/kvm.h   |  1 +
>  arch/arm/kvm/arm.c|  1 +
>  arch/arm/kvm/mmu.c|  4 ++--
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  include/linux/kvm_host.h  |  1 -
>  include/uapi/linux/kvm.h  |  2 ++
>  virt/kvm/kvm_main.c   |  7 ++-
>  8 files changed, 23 insertions(+), 10 deletions(-)
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 0007fef4ed814..a5a51403a7937 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -902,6 +902,7 @@ struct kvm_userspace_memory_region {
>  /* for kvm_memory_region::flags */
>  #define KVM_MEM_LOG_DIRTY_PAGES  (1UL << 0)
>  #define KVM_MEM_READONLY (1UL << 1)
> +#define KVM_MEM_UNCACHED (1UL << 2)
>  
>  This ioctl allows the user to create or modify a guest physical memory
>  slot.  When changing an existing slot, it may be moved in the guest
> @@ -917,12 +918,15 @@ It is recommended that the lower 21 bits of 
> guest_phys_addr and userspace_addr
>  be identical.  This allows large pages in the guest to be backed by large
>  pages in the host.
>  
> -The flags field supports two flags: KVM_MEM_LOG_DIRTY_PAGES and
> -KVM_MEM_READONLY.  The former can be set to instruct KVM to keep track of
> -writes to memory within the slot.  See KVM_GET_DIRTY_LOG ioctl to know how to
> -use it.  The latter can be set, if KVM_CAP_READONLY_MEM capability allows it,
> -to make a new slot read-only.  In this case, writes to this memory will be
> -posted to userspace as KVM_EXIT_MMIO exits.
> +The flags field supports three flags: KVM_MEM_LOG_DIRTY_PAGES,
> +KVM_MEM_READONLY, and KVM_MEM_UNCACHED.  The first can be set to instruct
> +KVM to keep track of writes to memory within the slot.  See KVM_GET_DIRTY_LOG
> +ioctl to know how to use it.  The second can be set, if KVM_CAP_READONLY_MEM
> +capability allows it, to make a new slot read-only.  In this case, writes to
> +this memory will be posted to userspace as KVM_EXIT_MMIO exits. The third can
> +be set, if the KVM_CAP_UNCACHED_MEM capability allows it. This remaps the
> +memory as uncached, i.e. userspace will always directly read/write RAM for
> +this memory region.

I would repeat the definition of the flag in the text here instead of
referring to the 'first', 'second', and 'third' here; it's too hard to
follow.

Also, which combination of these flags are allowed?  Can they all be set
in combination or are they mutually exclusive or a bit of everything?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 2/3] arm_gicv2m: Add GICv2m widget to support MSIs

2015-04-27 Thread Christoffer Dall
The ARM GICv2m widget is a little device that handles MSI interrupt
writes to a trigger register and ties them to a range of interrupt lines
wires to the GIC.  It has a few status/id registers and the interrupt wires,
and that's about it.

A board instantiates the device by setting the base SPI number and
number SPIs for the frame.  The base-spi parameter is indexed in the SPI
number space only, so base-spi == 0, means IRQ number 32.  When a device
(the PCI host controller) writes to the trigger register, the payload is
the GIC IRQ number, so we have to subtract 32 from that and then index
into our frame of SPIs.

When instantiating a GICv2m device, tell PCI that we have instantiated
something that can deal with MSIs.  We rely on the board actually wiring
up the GICv2m to the PCI host controller.

Signed-off-by: Christoffer Dall 
---
Changes since v1:
 - Check that writes to MSI_SETSPI are within the lower boundary as well
 - Move gicv2m to common-obj in Makefile
 - Separate switch case and comment for impdef regs
 - Clearly document what is emulated
 - Allow 16 bit lower accesses to MSI_SETSPI regs
 - Fix commit grammar error
 - Remove stray pixman commit

 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 2 files changed, 191 insertions(+)
 create mode 100644 hw/intc/arm_gicv2m.c

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 843864a..092d8a8 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -11,6 +11,7 @@ common-obj-$(CONFIG_SLAVIO) += slavio_intctl.o
 common-obj-$(CONFIG_IOAPIC) += ioapic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv2m.o
 common-obj-$(CONFIG_OPENPIC) += openpic.o
 
 obj-$(CONFIG_APIC) += apic.o apic_common.o
diff --git a/hw/intc/arm_gicv2m.c b/hw/intc/arm_gicv2m.c
new file mode 100644
index 000..e62dac8
--- /dev/null
+++ b/hw/intc/arm_gicv2m.c
@@ -0,0 +1,190 @@
+/*
+ *  GICv2m extension for MSI/MSI-x support with a GICv2-based system
+ *
+ * Copyright (C) 2015 Linaro, All rights reserved.
+ *
+ * Author: Christoffer Dall 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This file implements an emulated GICv2m widget as described in the ARM
+ * Server Base System Architecture (SBSA) specification Version 2.2
+ * (ARM-DEN-0029 v2.2) pages 35-39 without any optional implementation defined
+ * identification registers and with a single non-secure MSI register frame.
+ */
+
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+
+#define TYPE_ARM_GICV2M "gicv2m"
+#define ARM_GICV2M(obj) OBJECT_CHECK(ARMGICv2mState, (obj), TYPE_ARM_GICV2M)
+
+#define GICV2M_NUM_SPI_MAX 128
+
+#define V2M_MSI_TYPER   0x008
+#define V2M_MSI_SETSPI_NS   0x040
+#define V2M_MSI_IIDR0xFCC
+#define V2M_IIDR0   0xFD0
+#define V2M_IIDR11  0xFFC
+
+#define PRODUCT_ID_QEMU 0x51 /* ASCII code Q */
+
+typedef struct ARMGICv2mState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+qemu_irq spi[GICV2M_NUM_SPI_MAX];
+
+uint32_t base_spi;
+uint32_t num_spi;
+} ARMGICv2mState;
+
+static void gicv2m_set_irq(void *opaque, int irq)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+
+qemu_irq_pulse(s->spi[irq]);
+}
+
+static uint64_t gicv2m_read(void *opaque, hwaddr offset,
+unsigned size)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+uint32_t val;
+
+if (size != 4) {
+qemu_log_mask(LOG_GUEST_ERROR, "gicv2m_read: bad size %u\n", size);
+return 0;
+}
+
+switch (offset) {
+case V2M_MSI_TYPER:
+val = (s->base_spi + 32) << 16;
+val |= s->num_spi;
+return val;
+case V2M_MSI_IIDR:
+/* We don't have any valid implementor so we leave that field as zero
+ * and we return 0 in the arch revision as per the spec.
+ */
+return (PRODUCT_ID_QEMU << 20);
+case V2M_IIDR0 ... V2M_IIDR11:
+/* We do not implement any optional identification registers and the
+ * mandatory MSI_PIDR2 register reads as 0x0, so we capture all
+ * implementation defined registers here.
+ */
+return 0;
+default

[PATCH v2 1/3] target-arm: Add GIC phandle to VirtBoardInfo

2015-04-27 Thread Christoffer Dall
Instead of passing the GIC phandle around between functions, add it to
the VirtBoardInfo just like we do for the clock_phandle.  We are about
to add the v2m phandle as well, and it's easier not having to pass
around a bunch of phandles, return multiple values from functions, etc.

Reviewed-by: Peter Maydell 
Signed-off-by: Christoffer Dall 
---
Changes since v1:
 - Added reviewed-by tag

 hw/arm/virt.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 565f573..887bcef 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -87,6 +87,7 @@ typedef struct VirtBoardInfo {
 void *fdt;
 int fdt_size;
 uint32_t clock_phandle;
+uint32_t gic_phandle;
 } VirtBoardInfo;
 
 typedef struct {
@@ -322,12 +323,11 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 }
 }
 
-static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
 {
-uint32_t gic_phandle;
 
-gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", gic_phandle);
+vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
 
 qemu_fdt_add_subnode(vbi->fdt, "/intc");
 /* 'cortex-a15-gic' means 'GIC v2' */
@@ -340,12 +340,10 @@ static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
  2, vbi->memmap[VIRT_GIC_DIST].size,
  2, vbi->memmap[VIRT_GIC_CPU].base,
  2, vbi->memmap[VIRT_GIC_CPU].size);
-qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
-
-return gic_phandle;
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
-static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 /* We create a standalone GIC v2 */
 DeviceState *gicdev;
@@ -392,7 +390,7 @@ static uint32_t create_gic(const VirtBoardInfo *vbi, 
qemu_irq *pic)
 pic[i] = qdev_get_gpio_in(gicdev, i);
 }
 
-return fdt_add_gic_node(vbi);
+fdt_add_gic_node(vbi);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -639,8 +637,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, 
uint32_t gic_phandle,
0x7   /* PCI irq */);
 }
 
-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
-uint32_t gic_phandle)
+static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
 hwaddr base = vbi->memmap[VIRT_PCIE].base;
 hwaddr size = vbi->memmap[VIRT_PCIE].size;
@@ -712,7 +709,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq 
*pic,
  2, base_mmio, 2, size_mmio);
 
 qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
-create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
+create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
 
 g_free(nodename);
 }
@@ -734,7 +731,6 @@ static void machvirt_init(MachineState *machine)
 MemoryRegion *ram = g_new(MemoryRegion, 1);
 const char *cpu_model = machine->cpu_model;
 VirtBoardInfo *vbi;
-uint32_t gic_phandle;
 char **cpustr;
 
 if (!cpu_model) {
@@ -811,13 +807,13 @@ static void machvirt_init(MachineState *machine)
 
 create_flash(vbi);
 
-gic_phandle = create_gic(vbi, pic);
+create_gic(vbi, pic);
 
 create_uart(vbi, pic);
 
 create_rtc(vbi, pic);
 
-create_pcie(vbi, pic, gic_phandle);
+create_pcie(vbi, pic);
 
 /* Create mmio transports, so the user can create virtio backends
  * (which will be automatically plugged in to the transports). If
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 0/3] Add support for for GICv2m and MSIs to arm-virt

2015-04-27 Thread Christoffer Dall
Now when we have a host generic PCIe controller in the virt board, it
would be nice to be able to use MSIs so that we can eventually enable
VHOST with KVM.

With these patches you can use MSIs with TCG and with KVM, but you still
need some fixes for the mapping of the IRQ index to the GSI number for
IRQFD to work.  A separate series that enables IRQFD and vhost
is available: "ARM adaptations for vhost irqfd setup"
https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg01054.html)

Tested with KVM on XGene and with TCG by configuring a virtio-pci
network adapter for the guest and verifying MSIs going through as
expected.

See the individual patches for changelogs.

Christoffer Dall (3):
  target-arm: Add GIC phandle to VirtBoardInfo
  arm_gicv2m: Add GICv2m widget to support MSIs
  target-arm: Add the GICv2m to the virt board

 hw/arm/virt.c |  67 ++
 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 3 files changed, 244 insertions(+), 14 deletions(-)
 create mode 100644 hw/intc/arm_gicv2m.c

-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2 3/3] target-arm: Add the GICv2m to the virt board

2015-04-27 Thread Christoffer Dall
Add a GICv2m device to the virt board to enable MSIs on the generic PCI
host controller.  We allocate 64 SPIs in the IRQ space for now (this can
be increased/decreased later) and map the GICv2m right after the GIC in
the memory map.

Signed-off-by: Christoffer Dall 
---
Changes since v1:
 - Remove stray merge conflict line
 - Reworded commmit message.

 hw/arm/virt.c | 45 -
 1 file changed, 44 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 887bcef..b67a2c9 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -45,6 +45,7 @@
 #include "hw/pci-host/gpex.h"
 
 #define NUM_VIRTIO_TRANSPORTS 32
+#define NUM_GICV2M_SPIS 64
 
 /* Number of external interrupt lines to configure the GIC with */
 #define NUM_IRQS 128
@@ -71,6 +72,7 @@ enum {
 VIRT_RTC,
 VIRT_FW_CFG,
 VIRT_PCIE,
+VIRT_GIC_V2M,
 };
 
 typedef struct MemMapEntry {
@@ -88,6 +90,7 @@ typedef struct VirtBoardInfo {
 int fdt_size;
 uint32_t clock_phandle;
 uint32_t gic_phandle;
+uint32_t v2m_phandle;
 } VirtBoardInfo;
 
 typedef struct {
@@ -127,6 +130,7 @@ static const MemMapEntry a15memmap[] = {
 /* GIC distributor and CPU interfaces sit inside the CPU peripheral space 
*/
 [VIRT_GIC_DIST] =   { 0x0800, 0x0001 },
 [VIRT_GIC_CPU] ={ 0x0801, 0x0001 },
+[VIRT_GIC_V2M] ={ 0x0802, 0x1000 },
 [VIRT_UART] =   { 0x0900, 0x1000 },
 [VIRT_RTC] ={ 0x0901, 0x1000 },
 [VIRT_FW_CFG] = { 0x0902, 0x000a },
@@ -148,6 +152,7 @@ static const int a15irqmap[] = {
 [VIRT_RTC] = 2,
 [VIRT_PCIE] = 3, /* ... to 6 */
 [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
+[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
 };
 
 static VirtBoardInfo machines[] = {
@@ -323,9 +328,21 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 }
 }
 
-static void fdt_add_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
 {
+vbi->v2m_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
+qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+"arm,gic-v2m-frame");
+qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
+ 2, vbi->memmap[VIRT_GIC_V2M].base,
+ 2, vbi->memmap[VIRT_GIC_V2M].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
+}
 
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
+{
 vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
 qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
 
@@ -340,9 +357,31 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi)
  2, vbi->memmap[VIRT_GIC_DIST].size,
  2, vbi->memmap[VIRT_GIC_CPU].base,
  2, vbi->memmap[VIRT_GIC_CPU].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
+qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
+static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+{
+int i;
+int irq = vbi->irqmap[VIRT_GIC_V2M];
+DeviceState *dev;
+
+dev = qdev_create(NULL, "gicv2m");
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+qdev_prop_set_uint32(dev, "base-spi", irq);
+qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
+qdev_init_nofail(dev);
+
+for (i = 0; i < NUM_GICV2M_SPIS; i++) {
+sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
+}
+
+fdt_add_v2m_gic_node(vbi);
+}
+
 static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 /* We create a standalone GIC v2 */
@@ -391,6 +430,8 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 }
 
 fdt_add_gic_node(vbi);
+
+create_v2m(vbi, pic);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -700,6 +741,8 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq 
*pic)
 qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
 
+qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent", vbi->v2m_phandle);
+
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
  2, base_ecam, 2, size_ecam);
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-27 Thread Christoffer Dall
On Thu, Apr 23, 2015 at 03:26:53PM +0100, Alex Bennée wrote:
> 
> Christoffer Dall  writes:
> 
> > On Tue, Mar 31, 2015 at 04:08:04PM +0100, Alex Bennée wrote:
> >> This adds support for SW breakpoints inserted by userspace.
> >> 
> >> We do this by trapping all BKPT exceptions in the
> >> hypervisor (MDCR_EL2_TDE).
> >
> > you mean trapping all exceptions in the guest to the hypervisor?
> >
> >> The kvm_debug_exit_arch carries the address
> >> of the exception.
> >
> > why?  can userspace not simply read out the PC using GET_ONE_REG?
> 
> Yes, I have re-worded and removed PC from the debug information.
> 
> 
> >>  
> >> +  /* Trap breakpoints? */
> >> +  if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
> >> +  vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
> >> +  else
> >> +  vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;
> >
> > so now you're trapping all debug exceptions, right?
> >
> > what happens if the guest is using the hardware to debug debug stuff and
> > generates other kinds of debug exceptions, like a hardware breakpoint,
> > will we not see an unhandled exception and the guest being forcefully
> > killed?
> 
> Yes until the later patches which stop the guest using HW debug
> registers while we are using them.
> 
> >
> >> +
> >>  }
> >>  
> >>  void kvm_arch_clear_debug(struct kvm_vcpu *vcpu)
> >> diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c
> >> index 524fa25..ed1bbb4 100644
> >> --- a/arch/arm64/kvm/handle_exit.c
> >> +++ b/arch/arm64/kvm/handle_exit.c
> >> @@ -82,6 +82,37 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct 
> >> kvm_run *run)
> >>return 1;
> >>  }
> >>  
> >> +/**
> >> + * kvm_handle_debug_exception - handle a debug exception instruction
> >
> > handle a software breadkpoint exception
> >
> >> + *
> >> + * @vcpu: the vcpu pointer
> >> + * @run:  access to the kvm_run structure for results
> >> + *
> >> + * We route all debug exceptions through the same handler as we
> >
> > all debug exceptions?  software breakpoints and all?  then why the above
> > shot text?
> >

I think the issue here was "debug exception instruction" making me think
this is just for software breakpoints...

Not sure what I meant by 'shot text' - probably 'short text'

> >> + * just need to report the PC and the HSR values to userspace.
> >> + * Userspace may decide to re-inject the exception and deliver it to
> >> + * the guest if it wasn't for the host to deal with.
> >
> > now I'm confused - does userspace setup the guest to receive an
> > exception or does it tell KVM to emulate an exception for the guest or
> > do we execute the breakpoint without trapping the debug exception?
> 
> I've made it all go through userspace as we may have to translate the
> hypervisor visible exception code to what the guest was expecting to see.
> 

ok, so I think you should re-phrase something like:

"Userspace may decide that this exception is caused by the guest using
debugging itself, and may in that case emulate the guest debug exception
in userspace before resuming KVM."

But does that really work?  Given that we don't support KVM-TCG
migration, this sounds a little strange.  Did we test it?

> >
> >> + */
> >> +static int kvm_handle_guest_debug(struct kvm_vcpu *vcpu, struct kvm_run 
> >> *run)
> >> +{
> >> +  u32 hsr = kvm_vcpu_get_hsr(vcpu);
> >> +
> >> +  run->exit_reason = KVM_EXIT_DEBUG;
> >> +  run->debug.arch.hsr = hsr;
> >> +
> >> +  switch (hsr >> ESR_ELx_EC_SHIFT) {
> >> +  case ESR_ELx_EC_BKPT32:
> >> +  case ESR_ELx_EC_BRK64:
> >> +  run->debug.arch.pc = *vcpu_pc(vcpu);
> >> +  break;
> >> +  default:
> >> +  kvm_err("%s: un-handled case hsr: %#08x\n",
> >> +  __func__, (unsigned int) hsr);
> >
> > this should never happen right?
> 
> At the moment it could, at the end of the patch series we should cover
> all the cases so it would indicate a bug. I've made it return an error
> code so it fails hard as suggested by David.
> 
hmm, ok, so I'm not so worried about that kind of bisectability
(although it would be nice to keep that working too), but reading
patches that way is a bit annoying for reviewers, so I recommend you
deal with the patch ordering in some way that makes it more obvious what
happens as reviewers read the patches, one at a time.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-28 Thread Christoffer Dall
On Tue, Apr 28, 2015 at 10:34:12AM +0100, Peter Maydell wrote:
> On 28 April 2015 at 09:42, Alex Bennée  wrote:
> > Peter Maydell  writes:
> >> Does the kernel already have a conveniently implemented "inject
> >> exception into guest" lump of code? If so it might be less effort
> >> to do it that way round, maybe.
> >
> > So you pointed out we can't just re-inject the exceptions we get as we
> > need to map from things like ESR_ELx_EC_WATCHPT_LOW to
> > ESR_ELx_EC_WATCHPT_CUR before re-injection.
> >
> > Of course if it is as simple as modifying the ESR_EL1 register and
> > returning +ve in the handle_exit path then I can do that but I assumed
> > if any other wrangling needs doing it should be done in userspace.
> 
> Well, somebody's got to do it, and it's the same amount of work
> either way (fiddling with ESR, making sure we direct the guest
> to the right exception vector entry point, maybe a few other
> things).
> 
We already have code in the kernel to inject data/instruction aborts,
but not sure how much benefit there is in re-using that.  It's up to you
really, but I think the kernel code should be clear about what the
intention is so that we don't end up in a situation where: (1) The
intended behavior is unclear/vague, and (2) it doesn't actually work in
practice so nobody can follow the code.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-29 Thread Christoffer Dall
On Tue, Apr 28, 2015 at 03:37:01PM +0100, Alex Bennée wrote:
> 
> Christoffer Dall  writes:
> 
> > On Tue, Apr 28, 2015 at 10:34:12AM +0100, Peter Maydell wrote:
> >> On 28 April 2015 at 09:42, Alex Bennée  wrote:
> >> > Peter Maydell  writes:
> >> >> Does the kernel already have a conveniently implemented "inject
> >> >> exception into guest" lump of code? If so it might be less effort
> >> >> to do it that way round, maybe.
> >> >
> >> > So you pointed out we can't just re-inject the exceptions we get as we
> >> > need to map from things like ESR_ELx_EC_WATCHPT_LOW to
> >> > ESR_ELx_EC_WATCHPT_CUR before re-injection.
> >> >
> >> > Of course if it is as simple as modifying the ESR_EL1 register and
> >> > returning +ve in the handle_exit path then I can do that but I assumed
> >> > if any other wrangling needs doing it should be done in userspace.
> >> 
> >> Well, somebody's got to do it, and it's the same amount of work
> >> either way (fiddling with ESR, making sure we direct the guest
> >> to the right exception vector entry point, maybe a few other
> >> things).
> >> 
> > We already have code in the kernel to inject data/instruction aborts,
> > but not sure how much benefit there is in re-using that.  It's up to you
> > really, but I think the kernel code should be clear about what the
> > intention is so that we don't end up in a situation where: (1) The
> > intended behavior is unclear/vague, and (2) it doesn't actually work in
> > practice so nobody can follow the code.
> 
> Certainly there are some cases where the kernel doesn't have all the
> information. For example it doesn't know if the soft break was inserted
> by the guest or the host. That to me favours the "let userspace deal
> with the ugly" approach.
> 
Not sure I follow.

If it's an exception for the guest, then that must be because the guest
put in the breakpoint instruction, right?

However, that's a separate discussion from that of *how* userspace or
the kernel then injects an exception to the guest.

By using some QEMU TCG functionality or by QEMU calling back into KVM
and asking it to inject an exception for it.

What am I missing?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-29 Thread Christoffer Dall
On Wed, Apr 29, 2015 at 10:18:18AM +0100, Alex Bennée wrote:
> 
> Christoffer Dall  writes:
> 
> > On Tue, Apr 28, 2015 at 03:37:01PM +0100, Alex Bennée wrote:
> >> 
> >> Christoffer Dall  writes:
> >> 
> >> > On Tue, Apr 28, 2015 at 10:34:12AM +0100, Peter Maydell wrote:
> >> >> On 28 April 2015 at 09:42, Alex Bennée  wrote:
> >> >> > Peter Maydell  writes:
> >> >> >> Does the kernel already have a conveniently implemented "inject
> >> >> >> exception into guest" lump of code? If so it might be less effort
> >> >> >> to do it that way round, maybe.
> >> >> >
> >> >> > So you pointed out we can't just re-inject the exceptions we get as we
> >> >> > need to map from things like ESR_ELx_EC_WATCHPT_LOW to
> >> >> > ESR_ELx_EC_WATCHPT_CUR before re-injection.
> >> >> >
> >> >> > Of course if it is as simple as modifying the ESR_EL1 register and
> >> >> > returning +ve in the handle_exit path then I can do that but I assumed
> >> >> > if any other wrangling needs doing it should be done in userspace.
> >> >> 
> >> >> Well, somebody's got to do it, and it's the same amount of work
> >> >> either way (fiddling with ESR, making sure we direct the guest
> >> >> to the right exception vector entry point, maybe a few other
> >> >> things).
> >> >> 
> >> > We already have code in the kernel to inject data/instruction aborts,
> >> > but not sure how much benefit there is in re-using that.  It's up to you
> >> > really, but I think the kernel code should be clear about what the
> >> > intention is so that we don't end up in a situation where: (1) The
> >> > intended behavior is unclear/vague, and (2) it doesn't actually work in
> >> > practice so nobody can follow the code.
> >> 
> >> Certainly there are some cases where the kernel doesn't have all the
> >> information. For example it doesn't know if the soft break was inserted
> >> by the guest or the host. That to me favours the "let userspace deal
> >> with the ugly" approach.
> >> 
> > Not sure I follow.
> >
> > If it's an exception for the guest, then that must be because the guest
> > put in the breakpoint instruction, right?
> 
> No the host can add breakpoint instructions as well. They both generate
> the same (redirected) exception to the hypervisor which then has to
> figure out who planted the breakpoint and where the eventual exception
> will be handled.

I understand this; let's just rewind here.

If you've concluded that the exception is for the guest, then the guest
must have placed the breakpoint instruction there, correct?  Otherwise,
the exception is for the hypervisor and the discussion about how to
inject an exception for the guest is invalid.

Or are you talking about the corner case where the host uses a soft
breakpoint to get a breakpoint on an instruction which is also a
breakpoint in the guest?

> 
> > However, that's a separate discussion from that of *how* userspace or
> > the kernel then injects an exception to the guest.
> >
> > By using some QEMU TCG functionality or by QEMU calling back into KVM
> > and asking it to inject an exception for it.
> 
> I don't know if there is explicit TCG functionality to use but QEMU can
> set the registers and PC up for exception entry and re-enter KVM.
> 

I also understand this.  I think Peter's point was exactly that if we
have existing code somewhere which we can reuse, then we should consider
reusing it.

Again, I don't care particularly which way, I just want the expected
working behavior to be clearly defined.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 06/10] KVM: arm64: guest debug, add SW break point support

2015-04-29 Thread Christoffer Dall
On Wed, Apr 29, 2015 at 5:08 PM, Alex Bennée  wrote:
>
> Christoffer Dall  writes:
>
>> On Wed, Apr 29, 2015 at 10:18:18AM +0100, Alex Bennée wrote:
>>>
>>> Christoffer Dall  writes:
>>>
>>> > On Tue, Apr 28, 2015 at 03:37:01PM +0100, Alex Bennée wrote:
>>> >>
>>> >> Christoffer Dall  writes:
>>> >>
>>> >> > On Tue, Apr 28, 2015 at 10:34:12AM +0100, Peter Maydell wrote:
>>> >> >> On 28 April 2015 at 09:42, Alex Bennée  wrote:
>>> >> >> > Peter Maydell  writes:
>>> >> >> >> Does the kernel already have a conveniently implemented "inject
>>> >> >> >> exception into guest" lump of code? If so it might be less effort
>>> >> >> >> to do it that way round, maybe.
>>> >> >> >
> 
>>> >>
>>> >> Certainly there are some cases where the kernel doesn't have all the
>>> >> information. For example it doesn't know if the soft break was inserted
>>> >> by the guest or the host. That to me favours the "let userspace deal
>>> >> with the ugly" approach.
>>> >>
>>> > Not sure I follow.
>>> >
>>> > If it's an exception for the guest, then that must be because the guest
>>> > put in the breakpoint instruction, right?
>>>
>>> No the host can add breakpoint instructions as well. They both generate
>>> the same (redirected) exception to the hypervisor which then has to
>>> figure out who planted the breakpoint and where the eventual exception
>>> will be handled.
>>
>> I understand this; let's just rewind here.
>>
>> If you've concluded that the exception is for the guest, then the guest
>> must have placed the breakpoint instruction there, correct?  Otherwise,
>> the exception is for the hypervisor and the discussion about how to
>> inject an exception for the guest is invalid.
>
> But only userspace has enough information to make that conclusion (after
> searching the list of breakpoints it added to the code). So from
> userspace we can:
>
>   - re-enter KVM telling it to re-route the exception it just delivered
> to userspace somehow
>
>   or
>
>   - make the changes to deliver the exception in userspace and re-enter
> KVM as normal.
>

ok, we agree and are talking about the same thing.  good.

> It seems to me if we have already exited into userspace it may as well
> clean up if it has all the information it needs?
>

depends on the complexity and size of the code really, imho.

>> Or are you talking about the corner case where the host uses a soft
>> breakpoint to get a breakpoint on an instruction which is also a
>> breakpoint in the guest?
>
> I think in this case host debugging just wins.
>
ok
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] KVM: arm64: add active register handling to GICv3 emulation as well

2015-05-05 Thread Christoffer Dall
On Thu, Apr 23, 2015 at 08:01:53PM +0100, Andre Przywara wrote:
> Commit 47a98b15ba7c ("arm/arm64: KVM: support for un-queuing active
> IRQs") introduced handling of the GICD_I[SC]ACTIVER registers,
> but only for the GICv2 emulation. For the sake of completeness and
> as this is a pre-requisite for save/restore of the GICv3 distributor
> state, we should also emulate their handling in the distributor and
> redistributor frames of an emulated GICv3.
> 
> Signed-off-by: Andre Przywara 

Acked-by: Christoffer Dall 

Applied to queue, thanks!
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 2/2] KVM: arm/mips/x86/power use __kvm_guest_{enter|exit}

2015-05-05 Thread Christoffer Dall
On Thu, Apr 30, 2015 at 01:43:31PM +0200, Christian Borntraeger wrote:
> Use __kvm_guest_{enter|exit} instead of kvm_guest_{enter|exit}
> where interrupts are disabled.
> 
> Signed-off-by: Christian Borntraeger 

For the ARM part:

Acked-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: some questions about system calls in KVM/ARM

2015-05-05 Thread Christoffer Dall
On Sun, May 03, 2015 at 11:37:29AM +0800, 孙晓阳 wrote:
> Hello,
> I am intersted in KVM/ARM and I am trying to do some work based on KVM/ARM.
> But here is some questions :
> 1. if I set HCR.TGE=1, does the system calls will traps into hyp mode and
> be handled by hyp_svc( arch/arm/kvm/interrupts.S)?

no, hyp_svc is only handling hypervisor calls from hyp mode itself.  All
traps to Hyp mode are handled in hyp_hvc.  See "Use of offset 0x14 in
the Hyp vector table" in Section B1.8.1 in the ARMv7 ARM (DDI 0406C.c).

> 2. How can I get Guest OS's system call's number and address while issuing
> swi instruction in Guest OS usr mode and trapped into Hyp mode?

You have to know the ABI of the guest os and decode whatever
register/immediate used by that.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] kvm: remove one useless check extension

2015-05-05 Thread Christoffer Dall
On Mon, May 04, 2015 at 11:24:22AM +0200, Paolo Bonzini wrote:
> 
> 
> On 04/05/2015 04:48, Tiejun Chen wrote:
> > We already check KVM_CAP_IRQFD in generic once enable CONFIG_HAVE_KVM_IRQFD,
> > 
> > kvm_vm_ioctl_check_extension_generic()
> > |
> > + switch (arg) {
> > +   ...
> > +   #ifdef CONFIG_HAVE_KVM_IRQFD
> > +   case KVM_CAP_IRQFD:
> > +   #endif
> > +   ...
> > +   return 1;
> > +   ...
> > + }
> > |
> > + kvm_vm_ioctl_check_extension()
> > 
> > So its not necessary to check this in arch again, and also fix one typo,
> > s/emlation/emulation.
> > 
> > Signed-off-by: Tiejun Chen 
> > ---
> >  arch/arm/kvm/arm.c   | 1 -
> >  include/uapi/linux/kvm.h | 2 +-
> >  2 files changed, 1 insertion(+), 2 deletions(-)
> > 
> > diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> > index d9631ec..483b5d1 100644
> > --- a/arch/arm/kvm/arm.c
> > +++ b/arch/arm/kvm/arm.c
> > @@ -171,7 +171,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long 
> > ext)
> > int r;
> > switch (ext) {
> > case KVM_CAP_IRQCHIP:
> > -   case KVM_CAP_IRQFD:
> > case KVM_CAP_IOEVENTFD:
> > case KVM_CAP_DEVICE_CTRL:
> > case KVM_CAP_USER_MEMORY:
> > diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> > index 4b60056..2fa7302 100644
> > --- a/include/uapi/linux/kvm.h
> > +++ b/include/uapi/linux/kvm.h
> > @@ -894,7 +894,7 @@ struct kvm_xen_hvm_config {
> >   *
> >   * KVM_IRQFD_FLAG_RESAMPLE indicates resamplefd is valid and specifies
> >   * the irqfd to operate in resampling mode for level triggered interrupt
> > - * emlation.  See Documentation/virtual/kvm/api.txt.
> > + * emulation.  See Documentation/virtual/kvm/api.txt.
> >   */
> >  #define KVM_IRQFD_FLAG_RESAMPLE (1 << 1)
> >  
> > 
> 
> Acked-by: Paolo Bonzini 
> 
> Christoffer/Marc, please apply to the ARM tree in order to avoid
> pointless conflicts in kvm_vm_ioctl_check_extension.
> 
Applied, thanks.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC] ARM/ARM64: KVM: Implement KVM_FLUSH_DCACHE_GPA ioctl

2015-05-06 Thread Christoffer Dall
Hi Jérémy,

On Tue, May 05, 2015 at 11:13:11AM +0200, Jérémy Fanguède wrote:
> To maintain cache coherency on ARM, we may need a mechanism to flush
> the data cache.

In addition to generally just making this functionality available (see
below), do you have an actual use case in mind for this?  To solve the
VGA issue, for example, we already have a patch series from Drew trying
to address this.  Does that not work for you?

There was a long discussion about this here:
https://lists.cs.columbia.edu/pipermail/kvmarm/2015-February/013593.html

Drew then created a patch set, here:
https://lists.nongnu.org/archive/html/qemu-devel/2015-03/msg01254.html

and replied to himself, here:
https://www.marc.info/?l=android-virt&m=142670523929132&w=3

Which basically says that he doesn't like having to do flushes all over
QEMU (IIUC), so he sent this version instead:
https://lists.cs.columbia.edu/pipermail/kvmarm/2015-March/014027.html

Which he now said he'd respin.

Meanwhile there was also the idea of simply convincing the guest to
access devices as coherent through added DT properties etc., but I'm not
sure where that landed.

I think it would be better to agree to the overall approach before
sending out further patches.

On this particular patch, it does feel like sidestepping general kernel
functionality for arm64 through KVM, which I think is a bit weird.  I'd
like to hear Will and Catalin's thoughts on this before we'd go ahead
with something like this.

> 
> This patch implements KVM_FLUSH_DCACHE_GPA vm ioctl which flushes the
> data cache at a specified address range. The input argument is a
> struct kvm_mem_addr containing the guest physical address and the
> length.
> 
> Signed-off-by: Jérémy Fanguède 
> ---
>  arch/arm/kvm/arm.c   | 13 +
>  include/uapi/linux/kvm.h |  6 ++
>  2 files changed, 19 insertions(+)
> 
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index d9631ec..8638fd2 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -886,6 +886,19 @@ long kvm_arch_vm_ioctl(struct file *filp,
>  
>   return 0;
>   }
> + case KVM_FLUSH_DCACHE_GPA: {
> + struct kvm_mem_addr mem_addr;
> + hva_t hva;
> + gpa_t gpa;
> +
> + if (copy_from_user(&mem_addr, argp, sizeof(mem_addr)))
> + return -EFAULT;
> +
> + gpa = mem_addr.addr;
> + hva = gfn_to_hva(kvm, gpa_to_gfn(gpa)) | (gpa & ~PAGE_MASK);
> + kvm_flush_dcache_to_poc((void *)hva, mem_addr.len);

In case you end up moving further with this, see the following commit:

0d3e4d4 (arm/arm64: KVM: Use kernel mapping to perform invalidation on
page fault, 2015-01-05)

I think you end up with the same problem here.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 2/4] KVM: arm: vgic: fix state machine for forwarded IRQ

2015-05-06 Thread Christoffer Dall
On Wed, Feb 11, 2015 at 09:20:55AM +0100, Eric Auger wrote:
> Fix multiple injection of level sensitive forwarded IRQs.
> With current code, the second injection fails since the state bitmaps
> are not reset (process_maintenance is not called anymore).
> 
> New implementation follows those principles:
> - A forwarded IRQ only can be sampled when it is pending

why?

> - when queueing the IRQ (programming the LR), the pending state is removed
>   as for edge sensitive IRQs
> - an injection of a forwarded IRQ is considered always valid since
>   coming from the HW and level always is 1.
> 
> Signed-off-by: Eric Auger 
> 
> ---
> 
> v1 -> v2:
> - integration in new vgic_can_sample_irq
> - remove the pending state when programming the LR
> ---
>  virt/kvm/arm/vgic.c | 16 
>  1 file changed, 12 insertions(+), 4 deletions(-)
> 
> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> index cd00cf2..433ecba 100644
> --- a/virt/kvm/arm/vgic.c
> +++ b/virt/kvm/arm/vgic.c
> @@ -361,7 +361,10 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, 
> int irq)
>  
>  static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
>  {
> - return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
> + bool is_forwarded =  (vgic_get_phys_irq(vcpu, irq) >= 0);

can you create a wrapper function for is_forwarded?

> +
> + return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq) ||
> + (is_forwarded && vgic_dist_irq_is_pending(vcpu, irq));
>  }
>  
>  static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
> @@ -1296,6 +1299,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
> sgi_source_id, int irq)
>   struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
>   struct vgic_lr vlr;
>   int lr;
> + bool is_forwarded =  (vgic_get_phys_irq(vcpu, irq) >= 0);
>  
>   /* Sanitize the input... */
>   BUG_ON(sgi_source_id & ~7);
> @@ -1331,7 +1335,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
> sgi_source_id, int irq)
>   vlr.irq = irq;
>   vlr.source = sgi_source_id;
>   vlr.state = LR_STATE_PENDING;
> - if (!vgic_irq_is_edge(vcpu, irq))
> + if (!vgic_irq_is_edge(vcpu, irq) && !is_forwarded)
>   vlr.state |= LR_EOI_INT;
>  
>   vgic_set_lr(vcpu, lr, vlr);
> @@ -1372,11 +1376,12 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, int 
> irq)
>  
>  static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
>  {
> + bool is_forwarded = (vgic_get_phys_irq(vcpu, irq) >= 0);
>   if (!vgic_can_sample_irq(vcpu, irq))
>   return true; /* level interrupt, already queued */
>  
>   if (vgic_queue_irq(vcpu, 0, irq)) {
> - if (vgic_irq_is_edge(vcpu, irq)) {
> + if (vgic_irq_is_edge(vcpu, irq) || is_forwarded) {
>   vgic_dist_irq_clear_pending(vcpu, irq);
>   vgic_cpu_irq_clear(vcpu, irq);
>   } else {
> @@ -1626,14 +1631,17 @@ static int vgic_update_irq_pending(struct kvm *kvm, 
> int cpuid,
>   int edge_triggered, level_triggered;
>   int enabled;
>   bool ret = true;
> + bool is_forwarded;
>  
>   spin_lock(&dist->lock);
>  
>   vcpu = kvm_get_vcpu(kvm, cpuid);
> + is_forwarded = (vgic_get_phys_irq(vcpu, irq_num) >= 0);
> +
>   edge_triggered = vgic_irq_is_edge(vcpu, irq_num);
>   level_triggered = !edge_triggered;
>  
> - if (!vgic_validate_injection(vcpu, irq_num, level)) {
> + if (!vgic_validate_injection(vcpu, irq_num, level) && !is_forwarded) {

why is it again that we don't trust validate for forwarded irqs?  Should
it not be checked inside validate?  Otherwise, this seems to deserve a
comment.

>   ret = false;
>   goto out;
>   }

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 0/4] chip/vgic adaptations for forwarded irq

2015-05-06 Thread Christoffer Dall
Hi Eric,

On Wed, Feb 11, 2015 at 09:20:53AM +0100, Eric Auger wrote:
> This series proposes some fixes that appeared to be necessary
> to integrate IRQ forwarding in KVM/VFIO.
> 
> - deactivation of the forwarded IRQ in irq_disabled case
> - a specific handling of forwarded IRQ into the VGIC state machine.
> - deactivation of physical IRQ and unforwarding on vgic destruction
> - rb_tree lock in vgic.c
> 
> Integrated pieces can be found at
> ssh://git.linaro.org/people/eric.auger/linux.git
> on branch irqfd_integ_v9
> 
What are the dependencies for this at this point?

I assume it relies at least on the split EOI/priority drop changes?

Are you going to respin this when there are newer versions of the
dependencies out, or what are the plans?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 0/4] chip/vgic adaptations for forwarded irq

2015-05-07 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:32:53PM +0200, Eric Auger wrote:
> On 05/06/2015 04:27 PM, Christoffer Dall wrote:
> > Hi Eric,
> > 
> > On Wed, Feb 11, 2015 at 09:20:53AM +0100, Eric Auger wrote:
> >> This series proposes some fixes that appeared to be necessary
> >> to integrate IRQ forwarding in KVM/VFIO.
> >>
> >> - deactivation of the forwarded IRQ in irq_disabled case
> >> - a specific handling of forwarded IRQ into the VGIC state machine.
> >> - deactivation of physical IRQ and unforwarding on vgic destruction
> >> - rb_tree lock in vgic.c
> >>
> >> Integrated pieces can be found at
> >> ssh://git.linaro.org/people/eric.auger/linux.git
> >> on branch irqfd_integ_v9
> >>
> > What are the dependencies for this at this point?
> > 
> > I assume it relies at least on the split EOI/priority drop changes?
> 
> Yes it now only depends on split EOI/priority drop changes since
> "genirq: Saving/restoring the irqchip state of an irq line" now is
> upstreamed.
> 
> > 
> > Are you going to respin this when there are newer versions of the
> > dependencies out, or what are the plans?
> 
> Yes I will respin according to new versions. I am currently using a
> rebased version of Marc's original RFC "ARM: Forwarding physical
> interrupts to a guest VM" (http://lwn.net/Articles/603514/) which is a
> superset of [PATCH] genirq: Add support for priority-drop/deactivate
> interrupt controllers.
> 

ok, once there's movement on the dependency and you respin, I'll review
the rest of this in detail.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC v2 2/4] KVM: arm: vgic: fix state machine for forwarded IRQ

2015-05-07 Thread Christoffer Dall
On Thu, May 07, 2015 at 09:48:25AM +0200, Eric Auger wrote:
> Hi Christoffer,
> 
> On 05/06/2015 04:26 PM, Christoffer Dall wrote:
> > On Wed, Feb 11, 2015 at 09:20:55AM +0100, Eric Auger wrote:
> >> Fix multiple injection of level sensitive forwarded IRQs.
> >> With current code, the second injection fails since the state bitmaps
> >> are not reset (process_maintenance is not called anymore).
> >>
> >> New implementation follows those principles:
> >> - A forwarded IRQ only can be sampled when it is pending
> > 
> > why?
> For forwarded IRQ there is no modeled queued state (same as edge). The
> pending state is reset as soon as the vIRQ gets queued, in
> vgic_queue_hwirq (also same as edge). This modeling makes sure the vIRQ
> is injected once. I did not model the pending state since the above
> modeling looks simple and modeling the queued state did not work
> properly: I observed new forwarded IRQ could hit before the LR was seen
> cleaned. So overall, to me, current model looks closer to edge sensitive
> IRQs and looks simple & reliable compared to attempting to model any
> queued state.
> 

hmm, reading this, I'm remembering that the rationale was that the
pending state is maintained in the hardware so we never need to resample
any software state.  If the interrupt hits again (injected from VFIO for
example) it must have not been pending, otherwise we have a bug.

Is this the right way to look at it?

I think this needs to be documented somewhere in the code.


> > 
> >> - when queueing the IRQ (programming the LR), the pending state is removed
> >>   as for edge sensitive IRQs
> >> - an injection of a forwarded IRQ is considered always valid since
> >>   coming from the HW and level always is 1.
> >>
> >> Signed-off-by: Eric Auger 
> >>
> >> ---
> >>
> >> v1 -> v2:
> >> - integration in new vgic_can_sample_irq
> >> - remove the pending state when programming the LR
> >> ---
> >>  virt/kvm/arm/vgic.c | 16 
> >>  1 file changed, 12 insertions(+), 4 deletions(-)
> >>
> >> diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
> >> index cd00cf2..433ecba 100644
> >> --- a/virt/kvm/arm/vgic.c
> >> +++ b/virt/kvm/arm/vgic.c
> >> @@ -361,7 +361,10 @@ static void vgic_cpu_irq_clear(struct kvm_vcpu *vcpu, 
> >> int irq)
> >>  
> >>  static bool vgic_can_sample_irq(struct kvm_vcpu *vcpu, int irq)
> >>  {
> >> -  return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq);
> >> +  bool is_forwarded =  (vgic_get_phys_irq(vcpu, irq) >= 0);
> > 
> > can you create a wrapper function for is_forwarded?
> yes sure
> > 
> >> +
> >> +  return vgic_irq_is_edge(vcpu, irq) || !vgic_irq_is_queued(vcpu, irq) ||
> >> +  (is_forwarded && vgic_dist_irq_is_pending(vcpu, irq));
> >>  }
> >>  
> >>  static u32 mmio_data_read(struct kvm_exit_mmio *mmio, u32 mask)
> >> @@ -1296,6 +1299,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
> >> sgi_source_id, int irq)
> >>struct vgic_dist *dist = &vcpu->kvm->arch.vgic;
> >>struct vgic_lr vlr;
> >>int lr;
> >> +  bool is_forwarded =  (vgic_get_phys_irq(vcpu, irq) >= 0);
> >>  
> >>/* Sanitize the input... */
> >>BUG_ON(sgi_source_id & ~7);
> >> @@ -1331,7 +1335,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 
> >> sgi_source_id, int irq)
> >>vlr.irq = irq;
> >>vlr.source = sgi_source_id;
> >>vlr.state = LR_STATE_PENDING;
> >> -  if (!vgic_irq_is_edge(vcpu, irq))
> >> +  if (!vgic_irq_is_edge(vcpu, irq) && !is_forwarded)
> >>vlr.state |= LR_EOI_INT;
> >>  
> >>vgic_set_lr(vcpu, lr, vlr);
> >> @@ -1372,11 +1376,12 @@ static bool vgic_queue_sgi(struct kvm_vcpu *vcpu, 
> >> int irq)
> >>  
> >>  static bool vgic_queue_hwirq(struct kvm_vcpu *vcpu, int irq)
> >>  {
> >> +  bool is_forwarded = (vgic_get_phys_irq(vcpu, irq) >= 0);
> >>if (!vgic_can_sample_irq(vcpu, irq))
> >>return true; /* level interrupt, already queued */
> >>  
> >>if (vgic_queue_irq(vcpu, 0, irq)) {
> >> -  if (vgic_irq_is_edge(vcpu, irq)) {
> >> +  if (vgic_irq_is_edge(vcpu, irq) || is_forwarded) {
> >>vgic_dist_irq_clear_pending(vcpu, irq);
> >>vgic_cpu_irq_clear(vcpu, irq);
>

Re: [RFC] ARM/ARM64: KVM: Implement KVM_FLUSH_DCACHE_GPA ioctl

2015-05-07 Thread Christoffer Dall
On Thu, May 07, 2015 at 12:50:50PM +0200, Jérémy Fanguède wrote:
> On Wed, May 6, 2015 at 4:12 PM, Christoffer Dall
>  wrote:
> > Hi Jérémy,
> >
> > On Tue, May 05, 2015 at 11:13:11AM +0200, Jérémy Fanguède wrote:
> >> To maintain cache coherency on ARM, we may need a mechanism to flush
> >> the data cache.
> >
> > In addition to generally just making this functionality available (see
> > below), do you have an actual use case in mind for this?  To solve the
> > VGA issue, for example, we already have a patch series from Drew trying
> > to address this.  Does that not work for you?
> >
> > There was a long discussion about this here:
> > https://lists.cs.columbia.edu/pipermail/kvmarm/2015-February/013593.html
> >
> > Drew then created a patch set, here:
> > https://lists.nongnu.org/archive/html/qemu-devel/2015-03/msg01254.html
> >
> > and replied to himself, here:
> > https://www.marc.info/?l=android-virt&m=142670523929132&w=3
> >
> > Which basically says that he doesn't like having to do flushes all over
> > QEMU (IIUC), so he sent this version instead:
> > https://lists.cs.columbia.edu/pipermail/kvmarm/2015-March/014027.html
> >
> > Which he now said he'd respin.
> 
> In fact, I used this ioctl in pairs with this QEMU patch series:
> https://lists.nongnu.org/archive/html/qemu-devel/2015-05/msg00407.html
> My current work doesn't do anything about vga ram, so vga issue
> probably still persists, but it solves others issues with some
> emulated devices (mainly PCI) which were failing before and now work
> fine with this patch.

Why does Drew's approach not work and your approach works here?  What is
the case that we haven't though about yet?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC] ARM/ARM64: KVM: Implement KVM_FLUSH_DCACHE_GPA ioctl

2015-05-07 Thread Christoffer Dall
On Thu, May 7, 2015 at 4:50 PM, Jérémy Fanguède
 wrote:
> On Thu, May 7, 2015 at 1:20 PM, Christoffer Dall
>  wrote:
>> On Thu, May 07, 2015 at 12:50:50PM +0200, Jérémy Fanguède wrote:
>>> On Wed, May 6, 2015 at 4:12 PM, Christoffer Dall
>>>  wrote:
>>> > Hi Jérémy,
>>> >
>>> > On Tue, May 05, 2015 at 11:13:11AM +0200, Jérémy Fanguède wrote:
>>> >> To maintain cache coherency on ARM, we may need a mechanism to flush
>>> >> the data cache.
>>> >
>>> > In addition to generally just making this functionality available (see
>>> > below), do you have an actual use case in mind for this?  To solve the
>>> > VGA issue, for example, we already have a patch series from Drew trying
>>> > to address this.  Does that not work for you?
>>> >
>>> > There was a long discussion about this here:
>>> > https://lists.cs.columbia.edu/pipermail/kvmarm/2015-February/013593.html
>>> >
>>> > Drew then created a patch set, here:
>>> > https://lists.nongnu.org/archive/html/qemu-devel/2015-03/msg01254.html
>>> >
>>> > and replied to himself, here:
>>> > https://www.marc.info/?l=android-virt&m=142670523929132&w=3
>>> >
>>> > Which basically says that he doesn't like having to do flushes all over
>>> > QEMU (IIUC), so he sent this version instead:
>>> > https://lists.cs.columbia.edu/pipermail/kvmarm/2015-March/014027.html
>>> >
>>> > Which he now said he'd respin.
>>>
>>> In fact, I used this ioctl in pairs with this QEMU patch series:
>>> https://lists.nongnu.org/archive/html/qemu-devel/2015-05/msg00407.html
>>> My current work doesn't do anything about vga ram, so vga issue
>>> probably still persists, but it solves others issues with some
>>> emulated devices (mainly PCI) which were failing before and now work
>>> fine with this patch.
>>
>> Why does Drew's approach not work and your approach works here?  What is
>> the case that we haven't though about yet?
>
> The first patch from Andrew, (which is for arm64 only) doesn't permit
> me to make some emulated PCI devices working with virt, probably
> because some flushes/cleans are missing.
> As for the second patch, it focuses, for now, only on VGA ram. I
> quickly tried to extend the KVM_MEM_UNCACHED flag to the whole guest
> memory, but then the guest fails to boot; but even if it was working,
> does it make sense to map as uncached all the ram of the guest? Since
> we can not guess which region of the guest memory will be accessed.
>
> Simple PCI devices like e1000 or usb-ehci (with usb-kbd for instance)
> are not usable, with or without these patches, but if I flush a
> precise memory range, on reads and writes performed by emulated
> devices on guest ram, (It's exactly what the QEMU patch series that I
> sent does, with this ioctl), emulated PCI devices are now working.

I understand all this.  What I'd like for us to find out is why we are
having coherency issues.  We knew that for the VGA adapter, the guest
maps the memory as uncached (because that's how the real hardware
works), and QEMU maps the memory as cached (because it's just normal
memory), and unsurprisingly the two views of that memory is not
coherent.

What are the cases you are seeing with e1000 or usb-ehci?

Hint: We can make a lot of things work by just sticking cache flushes
all over, but it's not a good engineering approach.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 01/12] KVM: add comments for kvm_debug_exit_arch struct

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:16PM +0100, Alex Bennée wrote:
> Bring into line with the comments for the other structures and their
> KVM_EXIT_* cases. Also update api.txt to reflect use in kvm_run
> documentation.
> 
> Signed-off-by: Alex Bennée 
> Reviewed-by: David Hildenbrand 
> Reviewed-by: Andrew Jones 
> 
> ---
> 
> v2
>   - add comments for other exit types
> v3
>   - s/commentary/comments/
>   - add rb tags
>   - update api.txt kvm_run to include KVM_EXIT_DEBUG desc
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 9fa2bf8..cb90025 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -3070,11 +3070,13 @@ data_offset describes where the data is located 
> (KVM_EXIT_IO_OUT) or
>  where kvm expects application code to place the data for the next
>  KVM_RUN invocation (KVM_EXIT_IO_IN).  Data format is a packed array.
>  
> + /* KVM_EXIT_DEBUG */
>   struct {
>   struct kvm_debug_exit_arch arch;
>   } debug;
>  
> -Unused.
> +If the exit_reason in KVM_EXIT_DEBUG, then a vcpu is processing a debug event

s/in/is/

> +for which architecture dependant information is returned.

s/dependant/dependent/  (but maybe architecture specific is better)

>  
>   /* KVM_EXIT_MMIO */
>   struct {
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 4b60056..70ac641 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -237,6 +237,7 @@ struct kvm_run {
>   __u32 count;
>   __u64 data_offset; /* relative to kvm_run start */
>   } io;
> + /* KVM_EXIT_DEBUG */
>   struct {
>   struct kvm_debug_exit_arch arch;
>   } debug;
> @@ -285,6 +286,7 @@ struct kvm_run {
>   __u32 data;
>   __u8  is_write;
>   } dcr;
> + /* KVM_EXIT_INTERNAL_ERROR */
>   struct {
>   __u32 suberror;
>   /* Available with KVM_CAP_INTERNAL_ERROR_DATA: */
> @@ -295,6 +297,7 @@ struct kvm_run {
>   struct {
>   __u64 gprs[32];
>   } osi;
> + /* KVM_EXIT_PAPR_HCALL */
>   struct {
>   __u64 nr;
>   __u64 ret;
> -- 
> 2.3.5
> 

otherwise:

Acked-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 02/12] KVM: define common __KVM_GUESTDBG_USE_SW/HW_BP values

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:17PM +0100, Alex Bennée wrote:
> Currently x86, powerpc and soon arm64 use the same two architecture
> specific bits for guest debug support for software and hardware
> breakpoints. This makes the shared values explicit while leaving the
> gate open for another architecture to use some other value if they
> really really want to.
> 
> Signed-off-by: Alex Bennée 
> Reviewed-by: Andrew Jones 
> 
> diff --git a/arch/powerpc/include/uapi/asm/kvm.h 
> b/arch/powerpc/include/uapi/asm/kvm.h
> index ab4d473..1731569 100644
> --- a/arch/powerpc/include/uapi/asm/kvm.h
> +++ b/arch/powerpc/include/uapi/asm/kvm.h
> @@ -310,8 +310,8 @@ struct kvm_guest_debug_arch {
>   * and upper 16 bits are architecture specific. Architecture specific defines
>   * that ioctl is for setting hardware breakpoint or software breakpoint.
>   */
> -#define KVM_GUESTDBG_USE_SW_BP   0x0001
> -#define KVM_GUESTDBG_USE_HW_BP   0x0002
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
>  
>  /* definition of registers in kvm_run */
>  struct kvm_sync_regs {
> diff --git a/arch/x86/include/uapi/asm/kvm.h b/arch/x86/include/uapi/asm/kvm.h
> index d7dcef5..1438202 100644
> --- a/arch/x86/include/uapi/asm/kvm.h
> +++ b/arch/x86/include/uapi/asm/kvm.h
> @@ -250,8 +250,8 @@ struct kvm_debug_exit_arch {
>   __u64 dr7;
>  };
>  
> -#define KVM_GUESTDBG_USE_SW_BP   0x0001
> -#define KVM_GUESTDBG_USE_HW_BP   0x0002
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
>  #define KVM_GUESTDBG_INJECT_DB   0x0004
>  #define KVM_GUESTDBG_INJECT_BP   0x0008
>  
> diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
> index 70ac641..3b6252e 100644
> --- a/include/uapi/linux/kvm.h
> +++ b/include/uapi/linux/kvm.h
> @@ -570,8 +570,16 @@ struct kvm_s390_irq_state {
>  
>  /* for KVM_SET_GUEST_DEBUG */
>  
> -#define KVM_GUESTDBG_ENABLE  0x0001
> -#define KVM_GUESTDBG_SINGLESTEP  0x0002
> +#define KVM_GUESTDBG_ENABLE  (1 << 0)
> +#define KVM_GUESTDBG_SINGLESTEP  (1 << 1)
> +
> +/*
> + * Architecture specific stuff uses the top 16 bits of the field,

s/stuff//

> + * however there is some shared commonality for the common cases
> + */
> +#define __KVM_GUESTDBG_USE_SW_BP (1 << 16)
> +#define __KVM_GUESTDBG_USE_HW_BP (1 << 17)
> +
>  
>  struct kvm_guest_debug {
>   __u32 control;

We sort of left this discussion hanging with me expressing slight
concern about the usefulness about these defines.

Paolo, what are your thoughts?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 03/12] KVM: arm64: guest debug, define API headers

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:18PM +0100, Alex Bennée wrote:
> This commit defines the API headers for guest debugging. There are two
> architecture specific debug structures:
> 
>   - kvm_guest_debug_arch, allows us to pass in HW debug registers
>   - kvm_debug_exit_arch, signals exception and possible faulting address
> 
> The type of debugging being used is controlled by the architecture
> specific control bits of the kvm_guest_debug->control flags in the ioctl
> structure.
> 
> Signed-off-by: Alex Bennée 
> Reviewed-by: David Hildenbrand 
> Reviewed-by: Andrew Jones 
> 
> ---
> v2
>- expose hsr and pc directly to user-space
> v3
>- s/control/controlled/ in commit message
>- add v8 to ARM ARM comment (ARM Architecture Reference Manual)
>- add rb tag
>- rm pc, add far
>- re-word comments on alignment
>- rename KVM_ARM_NDBG_REGS -> KVM_ARM_MAX_DBG_REGS
> 
> diff --git a/arch/arm64/include/uapi/asm/kvm.h 
> b/arch/arm64/include/uapi/asm/kvm.h
> index d268320..04957d7 100644
> --- a/arch/arm64/include/uapi/asm/kvm.h
> +++ b/arch/arm64/include/uapi/asm/kvm.h
> @@ -100,10 +100,28 @@ struct kvm_sregs {
>  struct kvm_fpu {
>  };
>  
> +/*
> + * See v8 ARM ARM D7.3: Debug Registers
> + *
> + * The control registers are architecturally defined as 32 bits but are
> + * stored as 64 bit values alongside the value registers. This is done
> + * to keep the copying of these values into the vcpu context simple as
> + * everything is 64 bit aligned (see DBGBCR0_EL1 onwards in kvm_asm.h).
> + *
> + * The architectural limit is 16 debug registers of each type although
> + * in practice there are usually less (see ID_AA64DFR0_EL1).
> + */
> +#define KVM_ARM_MAX_DBG_REGS 16
>  struct kvm_guest_debug_arch {
> + __u64 dbg_bcr[KVM_ARM_MAX_DBG_REGS];
> + __u64 dbg_bvr[KVM_ARM_MAX_DBG_REGS];
> + __u64 dbg_wcr[KVM_ARM_MAX_DBG_REGS];
> + __u64 dbg_wvr[KVM_ARM_MAX_DBG_REGS];
>  };
>  
>  struct kvm_debug_exit_arch {
> + __u32 hsr;
> + __u64 far;
>  };
>  
>  struct kvm_sync_regs {
> @@ -216,4 +234,11 @@ struct kvm_arch_memory_slot {
>  
>  #endif
>  
> +/*
> + * Architecture related debug defines - upper 16 bits of

"Architecture specific debug control flags" seems more accurate.

> + * kvm_guest_debug->control
> + */
> +#define KVM_GUESTDBG_USE_SW_BP   __KVM_GUESTDBG_USE_SW_BP
> +#define KVM_GUESTDBG_USE_HW_BP   __KVM_GUESTDBG_USE_HW_BP
> +
>  #endif /* __ARM_KVM_H__ */
> -- 
> 2.3.5
> 

Otherwise:
Acked-by: Christoffer Dall 

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 05/12] KVM: arm: introduce kvm_arm_init/setup/clear_debug

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:20PM +0100, Alex Bennée wrote:
> This is a precursor for later patches which will need to do more to
> setup debug state before entering the hyp.S switch code. The existing
> functionality for setting mdcr_el2 has been moved out of hyp.S and now
> uses the value kept in vcpu->arch.mdcr_el2.
> 
> As the assembler used to previously mask and preserve MDCR_EL2.HPMN I've
> had to add a mechanism to save the value of mdcr_el2 as a per-cpu
> variable during the initialisation code. The kernel never sets this
> number so we are assuming the bootcode has set up the correct value
> here.
> 
> This also moves the conditional setting of the TDA bit from the hyp code
> into the C code which is currently used for the lazy debug register
> context switch code.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3
>   - rename fns from arch->arm
>   - preserve MDCR_EL2.HPMN setting
>   - re-word some of the comments
>   - fix some minor grammar nits
>   - merge setting of mdcr_el2
>   - introduce trap_debug flag
>   - move setup/clear within the irq lock section
> 
>  create mode 100644 arch/arm64/kvm/debug.c
> 
> diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
> index d71607c..746c0c69 100644
> --- a/arch/arm/include/asm/kvm_host.h
> +++ b/arch/arm/include/asm/kvm_host.h
> @@ -236,4 +236,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) 
> {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  
> +static inline void kvm_arm_init_debug(void) {}
> +static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {}
> +static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {}
> +
>  #endif /* __ARM_KVM_HOST_H__ */
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 52a1d4d38..4a274e1 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -570,6 +570,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
> kvm_run *run)
>   continue;
>   }
>  
> + kvm_arm_setup_debug(vcpu);
> +
>   /**
>* Enter the guest
>*/
> @@ -582,7 +584,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, 
> struct kvm_run *run)
>   vcpu->mode = OUTSIDE_GUEST_MODE;
>   kvm_guest_exit();
>   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
> - /*
> +
> + kvm_arm_clear_debug(vcpu);
> +
> +/*
>* We may have taken a host interrupt in HYP mode (ie
>* while executing the guest). This interrupt is still
>* pending, as we haven't serviced it yet!
> @@ -930,6 +935,8 @@ static void cpu_init_hyp_mode(void *dummy)
>   vector_ptr = (unsigned long)__kvm_hyp_vector;
>  
>   __cpu_init_hyp_mode(boot_pgd_ptr, pgd_ptr, hyp_stack_ptr, vector_ptr);
> +
> + kvm_arm_init_debug();
>  }
>  
>  static int hyp_init_cpu_notify(struct notifier_block *self,
> diff --git a/arch/arm64/include/asm/kvm_asm.h 
> b/arch/arm64/include/asm/kvm_asm.h
> index 4f7310f..d6b507e 100644
> --- a/arch/arm64/include/asm/kvm_asm.h
> +++ b/arch/arm64/include/asm/kvm_asm.h
> @@ -137,6 +137,8 @@ extern char __restore_vgic_v2_state[];
>  extern char __save_vgic_v3_state[];
>  extern char __restore_vgic_v3_state[];
>  
> +extern u32 __kvm_get_mdcr_el2(void);
> +
>  #endif
>  
>  #endif /* __ARM_KVM_ASM_H__ */
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index f0f58c9..7cb99b5 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -103,6 +103,7 @@ struct kvm_vcpu_arch {
>  
>   /* HYP configuration */
>   u64 hcr_el2;
> + u32 mdcr_el2;
>  
>   /* Exception Information */
>   struct kvm_vcpu_fault_info fault;
> @@ -250,4 +251,8 @@ static inline void kvm_arch_sync_events(struct kvm *kvm) 
> {}
>  static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
>  static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}
>  
> +void kvm_arm_init_debug(void);
> +void kvm_arm_setup_debug(struct kvm_vcpu *vcpu);
> +void kvm_arm_clear_debug(struct kvm_vcpu *vcpu);
> +
>  #endif /* __ARM64_KVM_HOST_H__ */
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index da675cc..dfb25a2 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -117,6 +117,7 @@ int main(void)
>DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, 
> arch.fault.hpfar_el2));
>DEFINE(VCPU_DEBUG_FLAGS,   offsetof(struct kvm_vcpu, arch.debug_flags));
>DEFINE(VCPU_HCR_EL2,   offsetof(struct kvm_vcpu, 
> arch.hcr_el2));
> +  DEFINE(VCPU_MDCR_EL2,  offsetof(struct kvm_vcpu, arch.mdcr_el2));
>DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, 

Re: [PATCH v3 06/12] KVM: arm64: guest debug, add SW break point support

2015-05-08 Thread Christoffer Dall
_32,
> @@ -96,6 +130,8 @@ static exit_handle_fn arm_exit_handlers[] = {
>   [ESR_ELx_EC_SYS64]  = kvm_handle_sys_reg,
>   [ESR_ELx_EC_IABT_LOW]   = kvm_handle_guest_abort,
>   [ESR_ELx_EC_DABT_LOW]   = kvm_handle_guest_abort,
> + [ESR_ELx_EC_BKPT32] = kvm_handle_guest_debug,
> + [ESR_ELx_EC_BRK64]  = kvm_handle_guest_debug,
>  };
>  
>  static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu)
> -- 
> 2.3.5
> 

Besides the nit:

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 07/12] KVM: arm64: guest debug, add support for single-step

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:22PM +0100, Alex Bennée wrote:
> This adds support for single-stepping the guest. To do this we need to
> manipulate the guests PSTATE.SS and MDSCR_EL1.SS bits which we do in the
> kvm_arm_setup/clear_debug() so we don't affect the apparent state of the
> guest. Additionally while the host is debugging the guest we suppress
> the ability of the guest to single-step itself.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>   - Move pstate/mdscr manipulation into C
>   - don't export guest_debug to assembly
>   - add accessor for saved_debug regs
>   - tweak save/restore of mdscr_el1
> v3
>   - don't save PC in debug information struct
>   - rename debug_saved_regs->guest_debug_state
>   - save whole value, only use bits in restore
>   - add save/restore_guest-debug_regs helper functions
>   - simplify commit message for clarity
>   - rm vcpu_debug_saved_reg access fn
> 
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 064c105..9b3ed6d 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -302,7 +302,9 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_arm_set_running_vcpu(NULL);
>  }
>  
> -#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE | 
> KVM_GUESTDBG_USE_SW_BP)
> +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE |\
> + KVM_GUESTDBG_USE_SW_BP | \
> + KVM_GUESTDBG_SINGLESTEP)
>  
>  /**
>   * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index 7cb99b5..b60fa7a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -123,6 +123,12 @@ struct kvm_vcpu_arch {
>* here.
>*/
>  
> + /* Guest registers we preserve during guest debugging */
> + struct {
> + u32 pstate;

This could do a with a comment: /* preserve SPSR_DEBUG_MASK bits */

> + u32 mdscr_el1;
> + } guest_debug_state;
> +
>   /* Don't run the guest */
>   bool pause;
>  
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 5bee676..19346e8 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -19,11 +19,42 @@
>  
>  #include 
>  
> +#include 
> +#include 
>  #include 
> +#include 
> +
> +/* These are the bits of MDSCR_EL1 we may manipulate */
> +#define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
> + DBG_MDSCR_KDE | \
> + DBG_MDSCR_MDE)
> +
> +#define SPSR_DEBUG_MASK DBG_SPSR_SS
>  
>  static DEFINE_PER_CPU(u32, mdcr_el2);
>  
>  /**
> + * save/restore_guest_debug_regs
> + *
> + * For some debug operations we need to tweak some guest registers. As
> + * a result we need to save the state of those registers before we
> + * make those modifications.
> + */
> +static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
> +{
> + vcpu->arch.guest_debug_state.pstate = *vcpu_cpsr(vcpu);
> + vcpu->arch.guest_debug_state.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
> +}
> +
> +static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
> +{
> + *vcpu_cpsr(vcpu) |=
> + (vcpu->arch.guest_debug_state.pstate & SPSR_DEBUG_MASK);
> + vcpu_sys_reg(vcpu, MDSCR_EL1) |=
> + (vcpu->arch.guest_debug_state.mdscr_el1 & MDSCR_EL1_DEBUG_MASK);

This doesn't look right.  Don't you need to also clear the values if
they were set by us for single-stepping the guest?  At least for the
MDSCR_EL1.SS bit.

What if we're single-stepping through guest code that modifies the SS bits
of these register for the guest state?  Is that possible and do we capture
this somehow?

> +}
> +
> +/**
>   * kvm_arm_init_debug - grab what we need for debug
>   *
>   * Currently the sole task of this function is to retrieve the initial
> @@ -38,7 +69,6 @@ void kvm_arm_init_debug(void)
>   __this_cpu_write(mdcr_el2, kvm_call_hyp(__kvm_get_mdcr_el2));
>  }
>  
> -
>  /**
>   * kvm_arm_setup_debug - set up debug related stuff
>   *
> @@ -75,15 +105,37 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>   else
>   vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
>  
> - /* Trap breakpoints? */
> - if (vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
> + /* Is Guest debugging in effect? */
> + if (vcpu->guest_debug) {

you could have just checked the field like this in the original patch,
but ok.

>   vcpu->arch.mdcr_el2 |= MDCR_EL2_TDE;
> - else
> - vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDE;


>  
> + /* Save guest debug state */
> + save_guest_debug_regs(vcpu);
> +
> + /*
> +  * Single Step (ARM ARM D2.12.3 The software step state
> +  * machine)
> +  *
> +  * If we are doing Single Step we need to manipulate
> +  * MDSCR_EL1.SS and PSTATE.SS. If not we need to
> +  * suppress the guests ability to tri

Re: [PATCH v3 04/12] KVM: arm: guest debug, add stub KVM_SET_GUEST_DEBUG ioctl

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:19PM +0100, Alex Bennée wrote:
> This commit adds a stub function to support the KVM_SET_GUEST_DEBUG
> ioctl. Any unsupported flag will return -EINVAL. For now, only
> KVM_GUESTDBG_ENABLE is supported, although it won't have any effects.
> 
> Signed-off-by: Alex Bennée .
> 
> ---
> v2
>   - simplified form of the ioctl (stuff will go into setup_debug)
> v3
>  - KVM_GUESTDBG_VALID->KVM_GUESTDBG_VALID_MASK
>  - move mask check to the top of function
>  - add ioctl doc header
>  - split capability into separate patch
>  - tweaked commit wording w.r.t return of -EINVAL
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index cb90025..4b0132f 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2645,7 +2645,7 @@ handled.
>  4.87 KVM_SET_GUEST_DEBUG
>  
>  Capability: KVM_CAP_SET_GUEST_DEBUG
> -Architectures: x86, s390, ppc
> +Architectures: x86, s390, ppc, arm64
>  Type: vcpu ioctl
>  Parameters: struct kvm_guest_debug (in)
>  Returns: 0 on success; -1 on error
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index d9631ec..52a1d4d38 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -302,10 +302,31 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>   kvm_arm_set_running_vcpu(NULL);
>  }
>  
> +#define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE)
> +
> +/**
> + * kvm_arch_vcpu_ioctl_set_guest_debug - set up guest debugging
> + * @kvm: pointer to the KVM struct
> + * @kvm_guest_debug: the ioctl data buffer
> + *
> + * This sets up and enables the VM for guest debugging. Userspace
> + * passes in a control flag to enable different debug types and
> + * potentially other architecture specific information in the rest of
> + * the structure.
> + */
>  int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu,
>   struct kvm_guest_debug *dbg)
>  {
> - return -EINVAL;
> + if (dbg->control & ~KVM_GUESTDBG_VALID_MASK)
> + return -EINVAL;
> +
> + if (dbg->control & KVM_GUESTDBG_ENABLE) {
> + vcpu->guest_debug = dbg->control;
> + } else {
> + /* If not enabled clear all flags */
> + vcpu->guest_debug = 0;
> + }
> + return 0;
>  }
>  

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 08/12] KVM: arm64: re-factor hyp.S debug register code

2015-05-08 Thread Christoffer Dall
On Thu, May 07, 2015 at 10:07:11AM +0100, Alex Bennée wrote:
> This is a pre-cursor to sharing the code with the guest debug support.
> This replaces the big macro that fishes data out of a fixed location
> with a more general helper macro to restore a set of debug registers. It
> uses macro substitution so it can be re-used for debug control and value
> registers. It does however rely on the debug registers being 64 bit
> aligned (as they happen to be in the hyp ABI).


> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3:
>   - return to the patch series
>   - add save and restore targets
>   - change register use and document
> 
> diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c
> index dfb25a2..ce7b7dd 100644
> --- a/arch/arm64/kernel/asm-offsets.c
> +++ b/arch/arm64/kernel/asm-offsets.c
> @@ -116,6 +116,10 @@ int main(void)
>DEFINE(VCPU_FAR_EL2,   offsetof(struct kvm_vcpu, 
> arch.fault.far_el2));
>DEFINE(VCPU_HPFAR_EL2, offsetof(struct kvm_vcpu, 
> arch.fault.hpfar_el2));
>DEFINE(VCPU_DEBUG_FLAGS,   offsetof(struct kvm_vcpu, arch.debug_flags));
> +  DEFINE(DEBUG_BCR,  offsetof(struct kvm_guest_debug_arch, dbg_bcr));
> +  DEFINE(DEBUG_BVR,  offsetof(struct kvm_guest_debug_arch, dbg_bvr));
> +  DEFINE(DEBUG_WCR,  offsetof(struct kvm_guest_debug_arch, dbg_wcr));
> +  DEFINE(DEBUG_WVR,  offsetof(struct kvm_guest_debug_arch, dbg_wvr));
>DEFINE(VCPU_HCR_EL2,   offsetof(struct kvm_vcpu, 
> arch.hcr_el2));
>DEFINE(VCPU_MDCR_EL2,  offsetof(struct kvm_vcpu, arch.mdcr_el2));
>DEFINE(VCPU_IRQ_LINES, offsetof(struct kvm_vcpu, arch.irq_lines));
> diff --git a/arch/arm64/kvm/hyp.S b/arch/arm64/kvm/hyp.S
> index 15159aa..dd51fb1 100644
> --- a/arch/arm64/kvm/hyp.S
> +++ b/arch/arm64/kvm/hyp.S
> @@ -228,199 +228,52 @@
>   stp x24, x25, [x3, #160]
>  .endm
>  
> -.macro save_debug
> - // x2: base address for cpu context
> - // x3: tmp register
> -
> - mrs x26, id_aa64dfr0_el1
> - ubfxx24, x26, #12, #4   // Extract BRPs
> - ubfxx25, x26, #20, #4   // Extract WRPs
> - mov w26, #15
> - sub w24, w26, w24   // How many BPs to skip
> - sub w25, w26, w25   // How many WPs to skip
> -
> - add x3, x2, #CPU_SYSREG_OFFSET(DBGBCR0_EL1)
> -
> - adr x26, 1f
> - add x26, x26, x24, lsl #2
> - br  x26
> -1:
> - mrs x20, dbgbcr15_el1
> - mrs x19, dbgbcr14_el1
> - mrs x18, dbgbcr13_el1
> - mrs x17, dbgbcr12_el1
> - mrs x16, dbgbcr11_el1
> - mrs x15, dbgbcr10_el1
> - mrs x14, dbgbcr9_el1
> - mrs x13, dbgbcr8_el1
> - mrs x12, dbgbcr7_el1
> - mrs x11, dbgbcr6_el1
> - mrs x10, dbgbcr5_el1
> - mrs x9, dbgbcr4_el1
> - mrs x8, dbgbcr3_el1
> - mrs x7, dbgbcr2_el1
> - mrs x6, dbgbcr1_el1
> - mrs x5, dbgbcr0_el1
> -
> - adr x26, 1f
> - add x26, x26, x24, lsl #2
> - br  x26
> -
> -1:
> - str x20, [x3, #(15 * 8)]
> - str x19, [x3, #(14 * 8)]
> - str x18, [x3, #(13 * 8)]
> - str x17, [x3, #(12 * 8)]
> - str x16, [x3, #(11 * 8)]
> - str x15, [x3, #(10 * 8)]
> - str x14, [x3, #(9 * 8)]
> - str x13, [x3, #(8 * 8)]
> - str x12, [x3, #(7 * 8)]
> - str x11, [x3, #(6 * 8)]
> - str x10, [x3, #(5 * 8)]
> - str x9, [x3, #(4 * 8)]
> - str x8, [x3, #(3 * 8)]
> - str x7, [x3, #(2 * 8)]
> - str x6, [x3, #(1 * 8)]
> - str x5, [x3, #(0 * 8)]
> -
> - add x3, x2, #CPU_SYSREG_OFFSET(DBGBVR0_EL1)
> -
> - adr x26, 1f
> - add x26, x26, x24, lsl #2
> - br  x26
> +.macro save_debug_registers type
> + // x4: pointer to register set
> + // x5: number of registers to copy

looking at the caller, you're actually passing the number of registers
to skip?

> + // x6..x22 trashed
> +
> + adr x22, 1f
> + add x22, x22, x5, lsl #2
> + br  x22
>  1:
> - mrs x20, dbgbvr15_el1
> - mrs x19, dbgbvr14_el1
> - mrs x18, dbgbvr13_el1
> - mrs x17, dbgbvr12_el1
> - mrs x16, dbgbvr11_el1
> - mrs x15, dbgbvr10_el1
> - mrs x14, dbgbvr9_el1
> - mrs x13, dbgbvr8_el1
> - mrs x12, dbgbvr7_el1
> - mrs x11, dbgbvr6_el1
> - mrs x10, dbgbvr5_el1
> - mrs x9, dbgbvr4_el1
> - mrs x8, dbgbvr3_el1
> - mrs x7, dbgbvr2_el1
> - mrs x6, dbgbvr1_el1
> - mrs x5, dbgbvr0_el1
> -
> - adr x26, 1f
> - add x26, x26, x24, lsl #2
> - br  x26
> -
> -1:
> - str x20, [x3, #(15 * 8)]
> - str x19, [x3, #(14 * 8)]
> - str x18, [x3, #(13 * 8)]
> - str x17, [x3, #(12 * 8)]
> - str x16, [x3, #(11 * 8)]
> - str x15, [x3, #(10 * 8)]
> - str x14, [x3, #(9 * 

Re: [PATCH v3 09/12] KVM: arm64: guest debug, HW assisted debug support

2015-05-08 Thread Christoffer Dall
On Thu, May 07, 2015 at 10:07:12AM +0100, Alex Bennée wrote:
> This adds support for userspace to control the HW debug registers for
> guest debug. In the debug ioctl we copy the IMPDEF defined number of
> registers into a new register set called host_debug_state. There is now
> a new vcpu parameter called debug_ptr which selects which register set
> is to copied into the real registers when world switch occurs.
> 
> I've moved some helper functions into the hw_breakpoint.h header for
> re-use.
> 
> As with single step we need to tweak the guest registers to enable the
> exceptions so we need to save and restore those bits.
> 
> Two new capabilities have been added to the KVM_EXTENSION ioctl to allow
> userspace to query the number of hardware break and watch points
> available on the host hardware.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v2
>- switched to C setup
>- replace host debug registers directly into context
>- minor tweak to api docs
>- setup right register for debug
>- add FAR_EL2 to debug exit structure
>- add support for trapping debug register access
> v3
>- remove stray trace statement
>- fix spacing around operators (various)
>- clean-up usage of trap_debug
>- introduce debug_ptr, replace excessive memcpy stuff
>- don't use memcpy in ioctl, just assign
>- update cap ioctl documentation
>- reword a number comments
>- rename host_debug_state->external_debug_state
> 
> diff --git a/Documentation/virtual/kvm/api.txt 
> b/Documentation/virtual/kvm/api.txt
> index 5ef937c..419f7a8 100644
> --- a/Documentation/virtual/kvm/api.txt
> +++ b/Documentation/virtual/kvm/api.txt
> @@ -2668,7 +2668,7 @@ The top 16 bits of the control field are architecture 
> specific control
>  flags which can include the following:
>  
>- KVM_GUESTDBG_USE_SW_BP: using software breakpoints [x86, arm64]
> -  - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390]
> +  - KVM_GUESTDBG_USE_HW_BP: using hardware breakpoints [x86, s390, arm64]
>- KVM_GUESTDBG_INJECT_DB: inject DB type exception [x86]
>- KVM_GUESTDBG_INJECT_BP: inject BP type exception [x86]
>- KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
> @@ -2683,6 +2683,11 @@ updated to the correct (supplied) values.
>  The second part of the structure is architecture specific and
>  typically contains a set of debug registers.
>  
> +For arm64 the number of debug registers is implementation defined and
> +can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and
> +KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which returns a +ve number

s/returns/return/
s/+ve/positive/

> +indicating the number of supported registers.
> +
>  When debug events exit the main run loop with the reason
>  KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
>  structure containing architecture specific debug information.
> diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
> index 9b3ed6d..2920185 100644
> --- a/arch/arm/kvm/arm.c
> +++ b/arch/arm/kvm/arm.c
> @@ -279,6 +279,10 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
>   /* Set up the timer */
>   kvm_timer_vcpu_init(vcpu);
>  
> + /* Set the debug registers to be the guests */
> + vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
> + &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
> +

yikes, I don't like this cast, how bad is it to get rid of the debug
registers in the sys_regs array ?

Also, pretty sure this is part of the breakage for the 32-bit build...

>   return 0;
>  }
>  
> @@ -304,6 +308,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
>  
>  #define KVM_GUESTDBG_VALID_MASK (KVM_GUESTDBG_ENABLE |\
>   KVM_GUESTDBG_USE_SW_BP | \
> + KVM_GUESTDBG_USE_HW_BP | \
>   KVM_GUESTDBG_SINGLESTEP)
>  
>  /**
> @@ -324,6 +329,12 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu 
> *vcpu,
>  
>   if (dbg->control & KVM_GUESTDBG_ENABLE) {
>   vcpu->guest_debug = dbg->control;
> +
> + /* Hardware assisted Break and Watch points */
> + if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {

is this only breakpoints or breakpoints and watch points?

> + vcpu->arch.external_debug_state = dbg->arch;
> + }
> +
>   } else {
>   /* If not enabled clear all flags */
>   vcpu->guest_debug = 0;
> diff --git a/arch/arm64/include/asm/hw_breakpoint.h 
> b/arch/arm64/include/asm/hw_breakpoint.h
> index 52b484b..c450552 100644
> --- a/arch/arm64/include/asm/hw_breakpoint.h
> +++ b/arch/arm64/include/asm/hw_breakpoint.h
> @@ -130,6 +130,18 @@ static inline void ptrace_hw_copy_thread(struct 
> task_struct *task)
>  }
>  #endif
>  
> +/* Determine number of BRP registers available. */
> +static inline int get_num_brps(void)
> +{
> + return ((read_cpuid(ID_AA64DFR0_EL1) >> 12) & 0xf) + 1;
> +}
> +
>

Re: [PATCH v3 00/12] KVM Guest Debug support for arm64

2015-05-08 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:23:15PM +0100, Alex Bennée wrote:
> Hi,
> 
> Here is V3 of the KVM Guest Debug support for arm64.
> 
> This sees the return of hyp.S re-factoring code which has been
> expanded to handle both the save and restore legs. The HW debug patch
> then adds a simple indirection to enable switching between the guest
> context debug registers and the active debugging context.
> 
> The API has been further simplified to remove the PC (as that is
> already available by the GET_ONE_REG ioctl). The responsibility for
> handling re-injection is now explicitly that of userspace.
> 
> The setup/clear debug code has gained an init function to be called at
> start-up and save useful values (currently only mdcr_el2.HPMN).
> 
> For full details see the changelog on each of the patches.
> 
> As before there are a few checkpatch violations for white space. Some
> in existing code (asm-offsets) and a couple in the handle_exit code
> where adding a whole extra tab seemed excessive.
> 
> Reviewed-by tags have been added to the earlier patches as
> appropriate.
> 
> GIT Repos:
> 
> The patches for this series are based off v4.1-rc1 and can be found
> at:
> 
> https://git.linaro.org/people/alex.bennee/linux.git
> branch: guest-debug/4.1-rc1-v3
> 

This doesn't seem to build for 32-bit ARM :(

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 10/12] KVM: arm64: trap nested debug register access

2015-05-08 Thread Christoffer Dall
On Thu, May 07, 2015 at 10:07:13AM +0100, Alex Bennée wrote:
> When we are using the hardware registers for guest debug we need to deal
> with the guests access to them. There is already a mechanism for dealing
> with these accesses so we build on top of that.
> 
>   - any access to mdscr_el1 is now stored in the mirror location
>   - access to DBG[WB][CV]R continues to go to guest's context
> 
> There is one register (MDCCINT_EL1) which guest debug doesn't care about
> so this behaves as before.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3
>   - re-factor for better flow and fall through.
>   - much simpler with debug_ptr (use the guest area as before)
>   - tweak shadow fn to avoid multi-line if
> 
> diff --git a/arch/arm64/include/asm/kvm_host.h 
> b/arch/arm64/include/asm/kvm_host.h
> index a44fb32..7aa3b3a 100644
> --- a/arch/arm64/include/asm/kvm_host.h
> +++ b/arch/arm64/include/asm/kvm_host.h
> @@ -132,7 +132,13 @@ struct kvm_vcpu_arch {
>* here.
>*/
>  
> - /* Guest registers we preserve during guest debugging */
> + /*
> +  * Guest registers we preserve during guest debugging.
> +  *
> +  * These shadow registers are updated by the kvm_handle_sys_reg
> +  * trap handler if the guest accesses or updates them while we
> +  * are using guest debug.
> +  */
>   struct {
>   u32 pstate;
>   u32 mdscr_el1;
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index 1ab63dd..dc8bca8 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -50,8 +50,7 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
>  {
>   *vcpu_cpsr(vcpu) |=
>   (vcpu->arch.guest_debug_state.pstate & SPSR_DEBUG_MASK);
> - vcpu_sys_reg(vcpu, MDSCR_EL1) |=
> - (vcpu->arch.guest_debug_state.mdscr_el1 & MDSCR_EL1_DEBUG_MASK);
> + vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_state.mdscr_el1;
>  }
>  
>  /**
> diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c
> index c370b40..95f422f 100644
> --- a/arch/arm64/kvm/sys_regs.c
> +++ b/arch/arm64/kvm/sys_regs.c
> @@ -196,11 +196,40 @@ static bool trap_dbgauthstatus_el1(struct kvm_vcpu 
> *vcpu,
>   * - If the dirty bit is set, save guest registers, restore host
>   *   registers and clear the dirty bit. This ensure that the host can
>   *   now use the debug registers.
> + *
> + * We also use this mechanism to set-up the debug registers for guest

s/set-up/set up/

> + * debugging. If this is the case we want to ensure the guest sees

If this is the case, (comma)

> + * the right versions of the registers - even if they are not going to
> + * be effective while guest debug is using HW debug.
> + *
>   */
> +
> +static bool shadow_debug_reg(struct kvm_vcpu *vcpu,
> +  const struct sys_reg_params *p,
> +  const struct sys_reg_desc *r)
> +{
> + /* MDSCR_EL1 */
> + if (r->reg == MDSCR_EL1) {
> + u32 *shadow_mdscr_el1 = &vcpu->arch.guest_debug_state.mdscr_el1;
> +
> + if (p->is_write)
> + *shadow_mdscr_el1 = *vcpu_reg(vcpu, p->Rt);
> + else
> + *vcpu_reg(vcpu, p->Rt) = *shadow_mdscr_el1;
> +
> + return true;
> + }
> +
> + return false;
> +}
> +
>  static bool trap_debug_regs(struct kvm_vcpu *vcpu,
>   const struct sys_reg_params *p,
>   const struct sys_reg_desc *r)
>  {
> + if (vcpu->guest_debug && shadow_debug_reg(vcpu, p, r))
> + return true;
> +

so do we also have a MDSCR_EL1 in sys_regs and one in guest_debug_state
now?

If yes, what are the differences between the two?

>   if (p->is_write) {
>   vcpu_sys_reg(vcpu, r->reg) = *vcpu_reg(vcpu, p->Rt);
>   vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
> -- 
> 2.3.5
> 
Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 11/12] KVM: arm64: enable KVM_CAP_SET_GUEST_DEBUG

2015-05-08 Thread Christoffer Dall
On Thu, May 07, 2015 at 10:07:14AM +0100, Alex Bennée wrote:
> Finally advertise the KVM capability for SET_GUEST_DEBUG. Once arm
> support is added this check can be moved to the common
> kvm_vm_ioctl_check_extension() code.
> 
> Signed-off-by: Alex Bennée 
> 
Acked-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 12/12] KVM: arm64: add trace points for guest_debug debug

2015-05-08 Thread Christoffer Dall
On Thu, May 07, 2015 at 10:07:15AM +0100, Alex Bennée wrote:
> This includes trace points for:
>   kvm_arch_setup_guest_debug
>   kvm_arch_clear_guest_debug
>   kvm_handle_guest_debug
> 
> I've also added some generic register setting trace events and also a
> trace point to dump the array of hardware registers.
> 
> Signed-off-by: Alex Bennée 
> 
> ---
> v3
>   - add trace event for debug access.
>   - remove short trace #define, rename trace events
>   - use __print_array with fixed array instead of own func
>   - rationalise trace points (only one per register changed)
>   - add vcpu ptr to the debug_setup trace
>   - remove :: in prints
> 
> diff --git a/arch/arm64/kvm/debug.c b/arch/arm64/kvm/debug.c
> index dc8bca8..08e1b83 100644
> --- a/arch/arm64/kvm/debug.c
> +++ b/arch/arm64/kvm/debug.c
> @@ -24,6 +24,8 @@
>  #include 
>  #include 
>  
> +#include "trace.h"
> +
>  /* These are the bits of MDSCR_EL1 we may manipulate */
>  #define MDSCR_EL1_DEBUG_MASK (DBG_MDSCR_SS | \
>   DBG_MDSCR_KDE | \
> @@ -44,6 +46,11 @@ static void save_guest_debug_regs(struct kvm_vcpu *vcpu)
>  {
>   vcpu->arch.guest_debug_state.pstate = *vcpu_cpsr(vcpu);
>   vcpu->arch.guest_debug_state.mdscr_el1 = vcpu_sys_reg(vcpu, MDSCR_EL1);
> +
> + trace_kvm_arm_set_dreg32("Saved PSTATE",
> + vcpu->arch.guest_debug_state.pstate);
> + trace_kvm_arm_set_dreg32("Saved MDSCR_EL1",
> + vcpu->arch.guest_debug_state.mdscr_el1);

wouldn't it make sense to turn these into a single tracepoint with two
parameters?

>  }
>  
>  static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
> @@ -51,6 +58,10 @@ static void restore_guest_debug_regs(struct kvm_vcpu *vcpu)
>   *vcpu_cpsr(vcpu) |=
>   (vcpu->arch.guest_debug_state.pstate & SPSR_DEBUG_MASK);
>   vcpu_sys_reg(vcpu, MDSCR_EL1) = vcpu->arch.guest_debug_state.mdscr_el1;
> +
> + trace_kvm_arm_set_dreg32("Restored PSTATE", *vcpu_cpsr(vcpu));
> + trace_kvm_arm_set_dreg32("Restored MDSCR_EL1",
> + vcpu_sys_reg(vcpu, MDSCR_EL1));

ditto

>  }
>  
>  /**
> @@ -92,6 +103,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>  {
>   bool trap_debug = !(vcpu->arch.debug_flags & KVM_ARM64_DEBUG_DIRTY);
>  
> + trace_kvm_arm_setup_debug(vcpu, vcpu->guest_debug);
> +
>   vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
>   vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
>   MDCR_EL2_TPMCR |
> @@ -121,6 +134,8 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu_sys_reg(vcpu, MDSCR_EL1) &= ~DBG_MDSCR_SS;
>   }
>  
> + trace_kvm_arm_set_dreg32("SPSR_EL2", *vcpu_cpsr(vcpu));
> +
>   /*
>* HW Break/Watch points
>*
> @@ -138,6 +153,14 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu->arch.debug_ptr = &vcpu->arch.external_debug_state;
>   vcpu->arch.debug_flags |= KVM_ARM64_DEBUG_DIRTY;
>   trap_debug = true;
> +
> + trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
> + 
> &vcpu->arch.debug_ptr->dbg_bcr[0],
> + 
> &vcpu->arch.debug_ptr->dbg_bvr[0]);
> +
> + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
> + 
> &vcpu->arch.debug_ptr->dbg_wcr[0],
> + 
> &vcpu->arch.debug_ptr->dbg_wvr[0]);

feels like this should also be a single tracepoint

>   }
>  
>   } else {
> @@ -155,10 +178,15 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
>   vcpu->arch.mdcr_el2 |= MDCR_EL2_TDA;
>   else
>   vcpu->arch.mdcr_el2 &= ~MDCR_EL2_TDA;
> +
> + trace_kvm_arm_set_dreg32("MDCR_EL2", vcpu->arch.mdcr_el2);
> + trace_kvm_arm_set_dreg32("MDSCR_EL1", vcpu_sys_reg(vcpu, MDSCR_EL1));
>  }
>  
>  void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
>  {
> + trace_kvm_arm_clear_debug(vcpu->guest_debug);
> +
>   if (vcpu->guest_debug) {
>   restore_guest_debug_regs(vcpu);
>  
> @@ -169,6 +197,14 @@ void kvm_arm_clear_debug(struct kvm_vcpu *vcpu)
>   if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP) {
>   vcpu->arch.debug_ptr = (struct kvm_guest_debug_arch *)
>   &vcpu_sys_reg(vcpu, DBGBCR0_EL1);
> +
> + trace_kvm_arm_set_regset("BKPTS", get_num_brps(),
> + 
> &vcpu->arch.debug_ptr->dbg_bcr[0],
> + 
> &vcpu->arch.debug_ptr->dbg_bvr[0]);
> +
> + trace_kvm_arm_set_regset("WAPTS", get_num_wrps(),
> + 
> &vcpu->arch.debug_ptr->dbg_wcr[0],
> +

Re: [PATCH 1/2] ARM: kvm: fix a bad BSYM() usage

2015-05-09 Thread Christoffer Dall
On Fri, May 08, 2015 at 05:08:42PM +0100, Russell King wrote:
> BSYM() should only be used when refering to local symbols in the same
> assembly file which are resolved by the assembler, and not for
> linker-fixed up symbols.  The use of BSYM() with panic is incorrect as
> the linker is involved in fixing up this relocation, and it knows
> whether panic() is ARM or Thumb.
> 
> Signed-off-by: Russell King 
> ---
>  arch/arm/kvm/interrupts.S | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
> 
> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> index 79caf79b304a..87847d2c5f99 100644
> --- a/arch/arm/kvm/interrupts.S
> +++ b/arch/arm/kvm/interrupts.S
> @@ -309,7 +309,7 @@ ENTRY(kvm_call_hyp)
>  THUMB(   orr r2, r2, #PSR_T_BIT  )
>   msr spsr_cxsf, r2
>   mrs r1, ELR_hyp
> - ldr r2, =BSYM(panic)
> + ldr r2, =panic
>   msr ELR_hyp, r2
>   ldr r0, =\panic_str
>   clrex   @ Clear exclusive monitor
> -- 
> 1.8.3.1
> 
Indeed, the linker figures it out as it should.  It does seem like the
right result is produced with the BSYM() macro as well so not sure what
the harm is.

Anyway, I've queued this to merge via the KVM tree.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] ARM: kvm: fix a bad BSYM() usage

2015-05-11 Thread Christoffer Dall
On Sat, May 09, 2015 at 09:10:57PM +0100, Russell King - ARM Linux wrote:
> On Sat, May 09, 2015 at 10:07:17PM +0200, Christoffer Dall wrote:
> > On Fri, May 08, 2015 at 05:08:42PM +0100, Russell King wrote:
> > > BSYM() should only be used when refering to local symbols in the same
> > > assembly file which are resolved by the assembler, and not for
> > > linker-fixed up symbols.  The use of BSYM() with panic is incorrect as
> > > the linker is involved in fixing up this relocation, and it knows
> > > whether panic() is ARM or Thumb.
> > > 
> > > Signed-off-by: Russell King 
> > > ---
> > >  arch/arm/kvm/interrupts.S | 2 +-
> > >  1 file changed, 1 insertion(+), 1 deletion(-)
> > > 
> > > diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> > > index 79caf79b304a..87847d2c5f99 100644
> > > --- a/arch/arm/kvm/interrupts.S
> > > +++ b/arch/arm/kvm/interrupts.S
> > > @@ -309,7 +309,7 @@ ENTRY(kvm_call_hyp)
> > >  THUMB(   orr r2, r2, #PSR_T_BIT  )
> > >   msr spsr_cxsf, r2
> > >   mrs r1, ELR_hyp
> > > - ldr r2, =BSYM(panic)
> > > + ldr r2, =panic
> > >   msr ELR_hyp, r2
> > >   ldr r0, =\panic_str
> > >   clrex   @ Clear exclusive monitor
> > > -- 
> > > 1.8.3.1
> > > 
> > Indeed, the linker figures it out as it should.  It does seem like the
> > right result is produced with the BSYM() macro as well so not sure what
> > the harm is.
> > 
> > Anyway, I've queued this to merge via the KVM tree.
> 
> I already have it in my tree (and linux-next) as the second patch (which
> removes the BSYM macro entirely) depends on this.
> 
ok, fine, you can add my ack then if you like:

Acked-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH 1/2] ARM: kvm: fix a bad BSYM() usage

2015-05-11 Thread Christoffer Dall
On Sat, May 09, 2015 at 10:10:56PM +0200, Ard Biesheuvel wrote:
> On 9 May 2015 at 22:07, Christoffer Dall  wrote:
> > On Fri, May 08, 2015 at 05:08:42PM +0100, Russell King wrote:
> >> BSYM() should only be used when refering to local symbols in the same
> >> assembly file which are resolved by the assembler, and not for
> >> linker-fixed up symbols.  The use of BSYM() with panic is incorrect as
> >> the linker is involved in fixing up this relocation, and it knows
> >> whether panic() is ARM or Thumb.
> >>
> >> Signed-off-by: Russell King 
> >> ---
> >>  arch/arm/kvm/interrupts.S | 2 +-
> >>  1 file changed, 1 insertion(+), 1 deletion(-)
> >>
> >> diff --git a/arch/arm/kvm/interrupts.S b/arch/arm/kvm/interrupts.S
> >> index 79caf79b304a..87847d2c5f99 100644
> >> --- a/arch/arm/kvm/interrupts.S
> >> +++ b/arch/arm/kvm/interrupts.S
> >> @@ -309,7 +309,7 @@ ENTRY(kvm_call_hyp)
> >>  THUMB(   orr r2, r2, #PSR_T_BIT  )
> >>   msr spsr_cxsf, r2
> >>   mrs r1, ELR_hyp
> >> - ldr r2, =BSYM(panic)
> >> + ldr r2, =panic
> >>   msr ELR_hyp, r2
> >>   ldr r0, =\panic_str
> >>   clrex   @ Clear exclusive monitor
> >> --
> >> 1.8.3.1
> >>
> > Indeed, the linker figures it out as it should.  It does seem like the
> > right result is produced with the BSYM() macro as well so not sure what
> > the harm is.
> >
> 
> BSYM() is defined as 'sym + 1' not 'sym | 1', so if the symbol has the
> thumb bit set already, the result is incorrect.
> 
yeah, but the linker will look at the result of 'sym + 1', so on my
system it ends up with 'sym + 1' after the linker has done its thing
(verified by looking at the disassembly of vmlinux); I assume the
linker logic is that it's branching to a thumb function but the target
is already the +1 so no action necessary, as opposed to just blindly
adding 1.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Wed, May 13, 2015 at 01:31:51PM +0200, Andrew Jones wrote:
> Introduce a new memory region flag, KVM_MEM_UNCACHED, which is
> needed by ARM. This flag informs KVM that the given memory region
> is typically mapped by the guest as non-cacheable. KVM for ARM
> then ensures that that memory is indeed mapped non-cacheable by
> the guest, and also remaps that region as non-cacheable for
> userspace, allowing them both to maintain a coherent view.
> 
> Changes since v1:
>  1) don't pin pages [Paolo]
>  2) ensure the guest maps the memory non-cacheable [me]
>  3) clean up memslot flag documentation [Christoffer]
> changes 1 and 2 effectively redesigned/rewrote v1. Find v1 here
> http://www.spinics.net/lists/kvm-arm/msg14022.html
> 
> The QEMU series for v1 hasn't really changed. Only the linux
> header hack needed to bump KVM_CAP_UNCACHED_MEM from 107 to
> 116.  Find the series here
> http://www.spinics.net/lists/kvm-arm/msg14026.html
> 
> Testing:
> This series still needs lots of testing, but I thought I'd
> kick it to the list early, as there's been recent interest
> in solving this problem, and I'd like to get test results
> and opinions on this approach from others sooner than later.
> I've tested with AAVMF (UEFI for AArch64 mach-virt guests).
> AAVMF has a kludge in it to avoid the coherency problem.

How does the 'kludge' work?

> I've tested both with and without that kludge active. Both
> worked for me (almost). Sometimes with the non-kludged
> version I was still able to see a bit of corruption in
> grub's output after edk2 loaded it - not much, and not always,
> but something.

Remind me, this is a VGA framebuffer corruption with a PCI-plugged VGA
card?

Thanks,
-Christoffer

> Anyway, it's quite frustrating, as I'm not sure
> what I'm missing...
> 
> This series applies to Linus' 110bc76729d4, but I tested with
> a version backported to the current RHELSA kernel.
> 
> Thanks for reviews and testing!
> 
> drew
> 
> 
> Andrew Jones (3):
>   arm/arm64: pageattr: add set_memory_nc
>   KVM: promote KVM_MEMSLOT_INCOHERENT to uapi
>   arm/arm64: KVM: implement 'uncached' mem coherency
> 
>  Documentation/virtual/kvm/api.txt | 20 --
>  arch/arm/include/asm/cacheflush.h |  1 +
>  arch/arm/include/asm/kvm_mmu.h|  5 -
>  arch/arm/include/asm/pgtable-3level.h |  1 +
>  arch/arm/include/asm/pgtable.h|  1 +
>  arch/arm/include/uapi/asm/kvm.h   |  1 +
>  arch/arm/kvm/arm.c|  1 +
>  arch/arm/kvm/mmu.c| 39 
> ++-
>  arch/arm/mm/pageattr.c|  7 +++
>  arch/arm64/include/asm/cacheflush.h   |  1 +
>  arch/arm64/include/asm/kvm_mmu.h  |  5 -
>  arch/arm64/include/asm/memory.h   |  1 +
>  arch/arm64/include/asm/pgtable.h  |  1 +
>  arch/arm64/include/uapi/asm/kvm.h |  1 +
>  arch/arm64/mm/pageattr.c  |  8 +++
>  include/linux/kvm_host.h  |  1 -
>  include/uapi/linux/kvm.h  |  2 ++
>  virt/kvm/kvm_main.c   |  7 ++-
>  18 files changed, 79 insertions(+), 24 deletions(-)
> 
> -- 
> 2.1.0
> 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 2/3] KVM: promote KVM_MEMSLOT_INCOHERENT to uapi

2015-05-14 Thread Christoffer Dall
On Wed, May 13, 2015 at 01:31:53PM +0200, Andrew Jones wrote:
> Commit 1050dcda30529 introduced KVM_MEMSLOT_INCOHERENT to flag memory
> regions that may have coherency issues due to mapping host system RAM
> as non-cacheable. This was introduced as a KVM internal flag, but now
> give KVM userspace access to it so that it may use it for hinting
> likely problematic regions. Also rename to KVM_MEM_UNCACHED.
> 
> Signed-off-by: Andrew Jones 

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 3/3] arm/arm64: KVM: implement 'uncached' mem coherency

2015-05-14 Thread Christoffer Dall
On Wed, May 13, 2015 at 01:31:54PM +0200, Andrew Jones wrote:
> When S1 and S2 memory attributes combine wrt to caching policy,
> non-cacheable types take precedence. If a guest maps a region as
> device memory, which KVM userspace is using to emulate the device
> using normal, cacheable memory, then we lose coherency. With
> KVM_MEM_UNCACHED, KVM userspace can now hint to KVM which memory
> regions are likely to be problematic. With this patch, as pages
> of these types of regions are faulted into the guest, not only do
> we flush the page's dcache, but we also change userspace's
> mapping to NC in order to maintain coherency.
> 
> What if the guest doesn't do what we expect? While we can't
> force a guest to use cacheable memory, we can take advantage of
> the non-cacheable precedence, and force it to use non-cacheable.
> So, this patch also introduces PAGE_S2_NORMAL_NC, and uses it on
> KVM_MEM_UNCACHED regions to force them to NC.
> 
> We now have both guest and userspace on the same page (pun intended)

I'd like to revisit the overall approach here.  Is doing non-cached
accesses in both the guest and host really the right thing to do here?

The semantics of the device becomes that it is cache coherent (because
QEMU is), and I think Marc argued that Linux/UEFI should simply be
adapted to handle whatever emulated devices we have as coherent.  I also
remember someone arguing that would be wrong (Peter?).

Finally, does this address all cache coherency issues with emulated
devices?  Some VOS guys had seen things still not working with this
approach, unsure why...  I'd like to avoid us merging this only to merge
a more complete solution in a few weeks which reverts this solution...

More comments/questions below:

> 
> Signed-off-by: Andrew Jones 
> ---
>  arch/arm/include/asm/kvm_mmu.h|  5 -
>  arch/arm/include/asm/pgtable-3level.h |  1 +
>  arch/arm/include/asm/pgtable.h|  1 +
>  arch/arm/kvm/mmu.c| 37 
> +++
>  arch/arm64/include/asm/kvm_mmu.h  |  5 -
>  arch/arm64/include/asm/memory.h   |  1 +
>  arch/arm64/include/asm/pgtable.h  |  1 +
>  7 files changed, 36 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h
> index 405aa18833073..e8034a80b12e5 100644
> --- a/arch/arm/include/asm/kvm_mmu.h
> +++ b/arch/arm/include/asm/kvm_mmu.h
> @@ -214,8 +214,11 @@ static inline void __coherent_cache_guest_page(struct 
> kvm_vcpu *vcpu, pfn_t pfn,
>   while (size) {
>   void *va = kmap_atomic_pfn(pfn);
>  
> - if (need_flush)
> + if (need_flush) {
>   kvm_flush_dcache_to_poc(va, PAGE_SIZE);
> + if (ipa_uncached)
> + set_memory_nc((unsigned long)va, 1);

nit: consider moving this outside the need_flush

> + }
>  
>   if (icache_is_pipt())
>   __cpuc_coherent_user_range((unsigned long)va,
> diff --git a/arch/arm/include/asm/pgtable-3level.h 
> b/arch/arm/include/asm/pgtable-3level.h
> index a745a2a53853c..39b3f7a40e663 100644
> --- a/arch/arm/include/asm/pgtable-3level.h
> +++ b/arch/arm/include/asm/pgtable-3level.h
> @@ -121,6 +121,7 @@
>   * 2nd stage PTE definitions for LPAE.
>   */
>  #define L_PTE_S2_MT_UNCACHED (_AT(pteval_t, 0x0) << 2) /* strongly 
> ordered */
> +#define L_PTE_S2_MT_NORMAL_NC(_AT(pteval_t, 0x5) << 2) /* 
> normal non-cacheable */
>  #define L_PTE_S2_MT_WRITETHROUGH (_AT(pteval_t, 0xa) << 2) /* normal 
> inner write-through */
>  #define L_PTE_S2_MT_WRITEBACK(_AT(pteval_t, 0xf) << 2) /* 
> normal inner write-back */
>  #define L_PTE_S2_MT_DEV_SHARED   (_AT(pteval_t, 0x1) << 2) /* 
> device */
> diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h
> index f40354198bad4..ae13ca8b0a23d 100644
> --- a/arch/arm/include/asm/pgtable.h
> +++ b/arch/arm/include/asm/pgtable.h
> @@ -100,6 +100,7 @@ extern pgprot_t   pgprot_s2_device;
>  #define PAGE_HYP _MOD_PROT(pgprot_kernel, L_PTE_HYP)
>  #define PAGE_HYP_DEVICE  _MOD_PROT(pgprot_hyp_device, L_PTE_HYP)
>  #define PAGE_S2  _MOD_PROT(pgprot_s2, L_PTE_S2_RDONLY)
> +#define PAGE_S2_NORMAL_NC__pgprot((pgprot_val(PAGE_S2) & 
> ~L_PTE_S2_MT_MASK) | L_PTE_S2_MT_NORMAL_NC)
>  #define PAGE_S2_DEVICE   _MOD_PROT(pgprot_s2_device, 
> L_PTE_S2_RDONLY)
>  
>  #define __PAGE_NONE  __pgprot(_L_PTE_DEFAULT | L_PTE_RDONLY | 
> L_PTE_XN | L_PTE_NONE)
> diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c
> index bc1665acd73e7..6b3bd8061bd2a 100644
> --- a/arch/arm/kvm/mmu.c
> +++ b/arch/arm/kvm/mmu.c
> @@ -1220,7 +1220,7 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, 
> phys_addr_t fault_ipa,
>   struct vm_area_struct *vma;
>   pfn_t pfn;
>   pgprot_t mem_type = PAGE_S2;
> - bool fault_ipa_uncached;
>

Re: [RFC/RFT PATCH v2 1/3] arm/arm64: pageattr: add set_memory_nc

2015-05-14 Thread Christoffer Dall
On Wed, May 13, 2015 at 01:31:52PM +0200, Andrew Jones wrote:
> Provide a method to change normal, cacheable memory to non-cacheable.
> KVM will make use of this to keep emulated device memory regions
> coherent with the guest.
> 
> Signed-off-by: Andrew Jones 

Reviewed-by: Christoffer Dall 

But you obviously need Russell and Will/Catalin to ack/merge this.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 4/5] arm64: alternative: Introduce feature for GICv3 CPU interface

2015-05-14 Thread Christoffer Dall
On Fri, Mar 27, 2015 at 01:09:24PM +, Marc Zyngier wrote:
> Add a new item to the feature set (ARM64_HAS_SYSREG_GIC_CPUIF)
> to indicate that we have a system register GIC CPU interface
> 
> This will help KVM switching to alternative instruction patching.
> 
> Reviewed-by: Andre Przywara 
> Acked-by: Will Deacon 
> Signed-off-by: Marc Zyngier 
> ---
>  arch/arm64/include/asm/cpufeature.h |  8 +++-
>  arch/arm64/kernel/cpufeature.c  | 16 
>  2 files changed, 23 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/arm64/include/asm/cpufeature.h 
> b/arch/arm64/include/asm/cpufeature.h
> index 6ae35d1..d9e57b5 100644
> --- a/arch/arm64/include/asm/cpufeature.h
> +++ b/arch/arm64/include/asm/cpufeature.h
> @@ -23,8 +23,9 @@
>  
>  #define ARM64_WORKAROUND_CLEAN_CACHE 0
>  #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE 1
> +#define ARM64_HAS_SYSREG_GIC_CPUIF   2
>  
> -#define ARM64_NCAPS  2
> +#define ARM64_NCAPS  3
>  
>  #ifndef __ASSEMBLY__
>  
> @@ -37,6 +38,11 @@ struct arm64_cpu_capabilities {
>   u32 midr_model;
>   u32 midr_range_min, midr_range_max;
>   };
> +
> + struct {/* Feature register checking */
> + u64 register_mask;
> + u64 register_value;
> + };
>   };
>  };
>  
> diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c
> index 3d9967e..b0bea2b3 100644
> --- a/arch/arm64/kernel/cpufeature.c
> +++ b/arch/arm64/kernel/cpufeature.c
> @@ -22,7 +22,23 @@
>  #include 
>  #include 
>  
> +static bool
> +has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
> +{
> + u64 val;
> +
> + val = read_cpuid(id_aa64pfr0_el1);

is this preferred compared to fishing it out of cpuinfo ?

> + return (val & entry->register_mask) == entry->register_value;
> +}
> +
>  static const struct arm64_cpu_capabilities arm64_features[] = {
> + {
> + .desc = "system register GIC CPU interface",
> + .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
> + .matches = has_id_aa64pfr0_feature,
> + .register_mask = (0xf << 24),
> + .register_value = (1 << 24),

I don't know if it's worth defining these masks in some header file.
The only other place I could see them used was in head.S.

> + },
>   {},
>  };
>  
> -- 
> 2.1.4
> 

Besides the nits, this looks good to me.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Thu, May 14, 2015 at 01:09:34PM +0200, Laszlo Ersek wrote:
> On 05/14/15 12:30, Christoffer Dall wrote:
> > On Wed, May 13, 2015 at 01:31:51PM +0200, Andrew Jones wrote:
> >> Introduce a new memory region flag, KVM_MEM_UNCACHED, which is
> >> needed by ARM. This flag informs KVM that the given memory region
> >> is typically mapped by the guest as non-cacheable. KVM for ARM
> >> then ensures that that memory is indeed mapped non-cacheable by
> >> the guest, and also remaps that region as non-cacheable for
> >> userspace, allowing them both to maintain a coherent view.
> >>
> >> Changes since v1:
> >>  1) don't pin pages [Paolo]
> >>  2) ensure the guest maps the memory non-cacheable [me]
> >>  3) clean up memslot flag documentation [Christoffer]
> >> changes 1 and 2 effectively redesigned/rewrote v1. Find v1 here
> >> http://www.spinics.net/lists/kvm-arm/msg14022.html
> >>
> >> The QEMU series for v1 hasn't really changed. Only the linux
> >> header hack needed to bump KVM_CAP_UNCACHED_MEM from 107 to
> >> 116.  Find the series here
> >> http://www.spinics.net/lists/kvm-arm/msg14026.html
> >>
> >> Testing:
> >> This series still needs lots of testing, but I thought I'd
> >> kick it to the list early, as there's been recent interest
> >> in solving this problem, and I'd like to get test results
> >> and opinions on this approach from others sooner than later.
> >> I've tested with AAVMF (UEFI for AArch64 mach-virt guests).
> >> AAVMF has a kludge in it to avoid the coherency problem.
> > 
> > How does the 'kludge' work?
> 
> https://github.com/tianocore/edk2/commit/f9a8be42
> 
> (It's probably worth looking at the documentation in the first hunk too,
> under the commit message.)
> 
Why is this a hack/unintuitive?  Is the semantics of the QEMU PCI bus
not simply that MMIO regions are coherent?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Thu, May 14, 2015 at 01:31:03PM +0200, Paolo Bonzini wrote:
> 
> 
> On 14/05/2015 13:29, Christoffer Dall wrote:
> > > (It's probably worth looking at the documentation in the first hunk too,
> > > under the commit message.)
> > 
> > Why is this a hack/unintuitive?  Is the semantics of the QEMU PCI bus
> > not simply that MMIO regions are coherent?
> 
> Only until device assignment gets into the picture.
> 
Will UEFI have to deal with device assignment in any respect?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 5/5] arm64: KVM: Switch vgic save/restore to alternative_insn

2015-05-14 Thread Christoffer Dall
On Fri, Mar 27, 2015 at 01:09:25PM +, Marc Zyngier wrote:
> So far, we configured the world-switch by having a small array
> of pointers to the save and restore functions, depending on the
> GIC used on the platform.
> 
> Loading these values each time is a bit silly (they never change),
> and it makes sense to rely on the instruction patching instead.
> 
> This leads to a nice cleanup of the code.
> 
> Acked-by: Will Deacon 
> Signed-off-by: Marc Zyngier 

I gave this a quick spin on Juno as well and works as expected:

Reviewed-by: Christoffer Dall 
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Thu, May 14, 2015 at 01:38:38PM +0200, Paolo Bonzini wrote:
> 
> 
> On 14/05/2015 13:36, Christoffer Dall wrote:
> > > > > (It's probably worth looking at the documentation in the first hunk 
> > > > > too,
> > > > > under the commit message.)
> > > > 
> > > > Why is this a hack/unintuitive?  Is the semantics of the QEMU PCI bus
> > > > not simply that MMIO regions are coherent?
> > > 
> > > Only until device assignment gets into the picture.
> > 
> > Will UEFI have to deal with device assignment in any respect?
> 
> Why not?  For example you could do network boot from an assigned network
> card.
> 
> In fact, anything that UEFI has to deal with, the OS has to deal with
> too.  If you need a UEFI hack, chances are you need or will need a Linux
> hack too.
> 
Fair enough.  I was thinking that UEFI needs to be built with knowledge
of all the hardware present including any passthrough devices, but I
guess this is plainly not true with PCI (and might not even be true with
the level of DT parsing we do for the virtual platform).

So, getting back to my original question.  Is the point then that UEFI
must assume (from ACPI/DT) the cache-coherency properties of the PCI
controller which exists in hardware on the system you're running on,
even for the virtual PCI bus because that will be the semantics for
assigned devices?

And in that case, we have no way to distinguish between passthrough
devices and virtual devices plugged into the virtual PCI bus?

What about the idea of having two virtual PCI buses on your system where
one is always cache-coherent and uses for virtual devices, and the other
is whatever the hardware is and used for passthrough devices?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Thu, May 14, 2015 at 02:08:49PM +0200, Paolo Bonzini wrote:
> 
> 
> On 14/05/2015 14:00, Christoffer Dall wrote:
> > So, getting back to my original question.  Is the point then that UEFI
> > must assume (from ACPI/DT) the cache-coherency properties of the PCI
> > controller which exists in hardware on the system you're running on,
> > even for the virtual PCI bus because that will be the semantics for
> > assigned devices?
> > 
> > And in that case, we have no way to distinguish between passthrough
> > devices and virtual devices plugged into the virtual PCI bus?
> 
> Well, we could use the subsystem id.  But it's a hack, and may cause
> incompatibilities with some drivers.  Michael, any ideas?
> 
> > What about the idea of having two virtual PCI buses on your system where
> > one is always cache-coherent and uses for virtual devices, and the other
> > is whatever the hardware is and used for passthrough devices?
> 
> I think that was rejected before.
> 

Do you remember where?  I just remember Catalin mentioning the idea to
me verbally.

Besides the slightly heavy added use of resources etc. it seems like it
would address some of our issues in a good way.

But I'm still not sure why UEFI/Linux currently sees our PCI bus as
being non-coherent when in fact it is and we have no passthrough issues
currently.  Are all PCI controllers always non-coherent for some reason
and therefore we model it as such too?

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-14 Thread Christoffer Dall
On Thu, May 14, 2015 at 02:28:49PM +0200, Paolo Bonzini wrote:
> 
> 
> On 14/05/2015 14:24, Christoffer Dall wrote:
> > On Thu, May 14, 2015 at 02:08:49PM +0200, Paolo Bonzini wrote:
> >>
> >>
> >> On 14/05/2015 14:00, Christoffer Dall wrote:
> >>> So, getting back to my original question.  Is the point then that UEFI
> >>> must assume (from ACPI/DT) the cache-coherency properties of the PCI
> >>> controller which exists in hardware on the system you're running on,
> >>> even for the virtual PCI bus because that will be the semantics for
> >>> assigned devices?
> >>>
> >>> And in that case, we have no way to distinguish between passthrough
> >>> devices and virtual devices plugged into the virtual PCI bus?
> >>
> >> Well, we could use the subsystem id.  But it's a hack, and may cause
> >> incompatibilities with some drivers.  Michael, any ideas?
> >>
> >>> What about the idea of having two virtual PCI buses on your system where
> >>> one is always cache-coherent and uses for virtual devices, and the other
> >>> is whatever the hardware is and used for passthrough devices?
> >>
> >> I think that was rejected before.
> > 
> > Do you remember where?  I just remember Catalin mentioning the idea to
> > me verbally.
> 
> In the last centithread on the subject. :)
> 
> At least I and Peter disagreed.  It's not about the heavy added use of
> resources, it's more about it being really easy to misconfigure.
> 
> > But I'm still not sure why UEFI/Linux currently sees our PCI bus as
> > being non-coherent when in fact it is and we have no passthrough issues
> > currently.  Are all PCI controllers always non-coherent for some reason
> > and therefore we model it as such too?
> 
> Well, PCI BARs are generally MMIO resources, and hence should not be cached.
> 
> As an optimization, OS drivers can mark them as cacheable or
> write-combining or something like that, but in general it's a safe
> default to leave them uncached---one would think.
> 
ok, I guess this series makes sense then, assuming it works, and
assuming we don't kill performance by going to RAM all the time when we
don't have to...

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 1/3] arm/arm64: pageattr: add set_memory_nc

2015-05-15 Thread Christoffer Dall
On Thu, May 14, 2015 at 03:46:44PM +0200, Andrew Jones wrote:
> On Thu, May 14, 2015 at 01:05:09PM +0200, Christoffer Dall wrote:
> > On Wed, May 13, 2015 at 01:31:52PM +0200, Andrew Jones wrote:
> > > Provide a method to change normal, cacheable memory to non-cacheable.
> > > KVM will make use of this to keep emulated device memory regions
> > > coherent with the guest.
> > > 
> > > Signed-off-by: Andrew Jones 
> > 
> > Reviewed-by: Christoffer Dall 
> > 
> > But you obviously need Russell and Will/Catalin to ack/merge this.
> 
> I guess this patch is going to go away in the next round. You've
> pointed out that I screwed stuff up royally with my over eagerness
> to reuse code. I need to reimplement change_memory_common, but a
> version that takes an mm, which is more or less what I did in the
> last version of this series, back when I was pinning pages.
> 
Yeah, I just read this one before looking at the others because it was a
simple one...

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [RFC/RFT PATCH v2 3/3] arm/arm64: KVM: implement 'uncached' mem coherency

2015-05-15 Thread Christoffer Dall
On Thu, May 14, 2015 at 03:32:13PM +0200, Andrew Jones wrote:
> On Thu, May 14, 2015 at 12:55:49PM +0200, Christoffer Dall wrote:
> > On Wed, May 13, 2015 at 01:31:54PM +0200, Andrew Jones wrote:
> > > When S1 and S2 memory attributes combine wrt to caching policy,
> > > non-cacheable types take precedence. If a guest maps a region as
> > > device memory, which KVM userspace is using to emulate the device
> > > using normal, cacheable memory, then we lose coherency. With
> > > KVM_MEM_UNCACHED, KVM userspace can now hint to KVM which memory
> > > regions are likely to be problematic. With this patch, as pages
> > > of these types of regions are faulted into the guest, not only do
> > > we flush the page's dcache, but we also change userspace's
> > > mapping to NC in order to maintain coherency.
> > > 
> > > What if the guest doesn't do what we expect? While we can't
> > > force a guest to use cacheable memory, we can take advantage of
> > > the non-cacheable precedence, and force it to use non-cacheable.
> > > So, this patch also introduces PAGE_S2_NORMAL_NC, and uses it on
> > > KVM_MEM_UNCACHED regions to force them to NC.
> > > 
> > > We now have both guest and userspace on the same page (pun intended)
> > 
> > I'd like to revisit the overall approach here.  Is doing non-cached
> > accesses in both the guest and host really the right thing to do here?
> 
> I think so, but all ideas/approaches are still on the table. This is
> still an RFC.
> 
> > 
> > The semantics of the device becomes that it is cache coherent (because
> > QEMU is), and I think Marc argued that Linux/UEFI should simply be
> > adapted to handle whatever emulated devices we have as coherent.  I also
> > remember someone arguing that would be wrong (Peter?).
> 
> I'm not really for quirking all devices in all guest types (AAVMF, Linux,
> other bootloaders, other OSes). Windows is unlikely to apply any quirks.
> 

Well my point was that if we're emulating a platform with coherent IO
memory for PCI devices that is something that the guest should work with
as such, but as Paolo explained it should always be safe for a guest to
assume non-coherent, so that doesn't work.

> > 
> > Finally, does this address all cache coherency issues with emulated
> > devices?  Some VOS guys had seen things still not working with this
> > approach, unsure why...  I'd like to avoid us merging this only to merge
> > a more complete solution in a few weeks which reverts this solution...
> 
> I'm not sure (this is still an RFT too :-) We definitely would need to
> scatter some more memory_region_set_uncached() calls around QEMU first.
> 

It would be good if you could sync with the VOS guys and make sure your
patch set addresses their issues with the appropriate
memory_region_set_uncached() added to QEMU, and if it does not, some
vague idea why that falls outside of the scope of this patch set.  After
all, adding a USB controller to a VM is not that an esoteric use case,
is it?

> > 
> > More comments/questions below:
> > 
> > > 
> > > Signed-off-by: Andrew Jones 
> > > ---
> > >  arch/arm/include/asm/kvm_mmu.h|  5 -
> > >  arch/arm/include/asm/pgtable-3level.h |  1 +
> > >  arch/arm/include/asm/pgtable.h|  1 +
> > >  arch/arm/kvm/mmu.c| 37 
> > > +++
> > >  arch/arm64/include/asm/kvm_mmu.h  |  5 -
> > >  arch/arm64/include/asm/memory.h   |  1 +
> > >  arch/arm64/include/asm/pgtable.h  |  1 +
> > >  7 files changed, 36 insertions(+), 15 deletions(-)
> > > 
> > > diff --git a/arch/arm/include/asm/kvm_mmu.h 
> > > b/arch/arm/include/asm/kvm_mmu.h
> > > index 405aa18833073..e8034a80b12e5 100644
> > > --- a/arch/arm/include/asm/kvm_mmu.h
> > > +++ b/arch/arm/include/asm/kvm_mmu.h
> > > @@ -214,8 +214,11 @@ static inline void 
> > > __coherent_cache_guest_page(struct kvm_vcpu *vcpu, pfn_t pfn,
> > >   while (size) {
> > >   void *va = kmap_atomic_pfn(pfn);
> > >  
> > > - if (need_flush)
> > > + if (need_flush) {
> > >   kvm_flush_dcache_to_poc(va, PAGE_SIZE);
> > > + if (ipa_uncached)
> > > + set_memory_nc((unsigned long)va, 1);
> > 
> > nit: consider moving this outside the need_flush
> > 
> > > + }
> > >  
> > >   if (icache_is_pipt())
> > > 

Re: [Qemu-devel] [RFC/RFT PATCH v2 0/3] KVM: Introduce KVM_MEM_UNCACHED

2015-05-15 Thread Christoffer Dall
On Thu, May 14, 2015 at 03:36:37PM +0200, Andrew Jones wrote:
> On Thu, May 14, 2015 at 02:11:59PM +0100, Peter Maydell wrote:
> > On 14 May 2015 at 14:03, Andrew Jones  wrote:
> > > On Thu, May 14, 2015 at 11:37:46AM +0100, Peter Maydell wrote:
> > >> On 14 May 2015 at 11:31, Andrew Jones  wrote:
> > >> > Forgot to (4): switch from setting userspace's mapping to
> > >> > device memory to normal, non-cacheable. Using device memory
> > >> > caused a problem that Alex Graf found, and Peter Maydell suggested
> > >> > using normal, non-cacheable instead.
> > >>
> > >> Did you check that non-cacheable is definitely the correct
> > >> kind of Normal memory attribute we want? (ie not write-through).
> > >
> > > I was concerned that write-through wouldn't be sufficient. If the
> > > guest writes to its non-cached memory, and QEMU needs to see what
> > > it wrote, then won't write-through fail to work? Unless we some
> > > how invalidate the cache first?
> > 
> > Well, I meant more that the correct mapping for userspace is
> > the same as the guest, whatever that is, and so somebody needs
> > to look at what the guest actually does rather than merely
> > hoping NormalNC is OK. (For instance, do we need to provide
> > support for QEMU to map both NC and writethrough?)
> >
> 
> Ah, we assume the guest is mapping it as device memory, and in
> this version of the series, I ensure that it is at least NC with
> the S2 attributes. I don't think we can look at what some guests
> do with some devices to come up with anything beyond (poor?)
> heuristics. I prefer that we force both the guest and QEMU to NC
> (or guest chooses Device and QEMU is forced to NC) to make sure
> we get it right.
> 
But picking up on Peter's feedback I think it would be good if the
series clearly states something like:

1) We assume that the guest may use device type memory for the accesses
2) we cannot use device memory for the userspace mapping because
userspace may be doing unaligned accesses to it 3) normal non-cacheable
bridges these worlds becauase of x, y, and z.

I assume x, y, and z would include a fairly involved discussion of the
interesting aspects of how you can configure memory accesses on ARM ...
:)

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC] ARM/ARM64: KVM: Implement KVM_FLUSH_DCACHE_GPA ioctl

2015-05-15 Thread Christoffer Dall
On Fri, May 15, 2015 at 01:43:57PM +0200, Laszlo Ersek wrote:
> On 05/07/15 19:01, Paolo Bonzini wrote:
> > 
> > 
> > On 07/05/2015 18:56, Jérémy Fanguède wrote:
> >> USB devices fail with a timeout error, as if the communication between
> >> the kernel and the devices fail at a certain point:
> >> usb 1-1: device not accepting address 5, error -110
> >> usb usb1-port1: unable to enumerate USB device
> 
> This is consistent with what I saw in my earlier testing.
> 
> >> e1000 fails when the userspace tries to use it, with these type of
> >> kernel messages:
> >> e1000 :00:02.0 eth0: Detected Tx Unit Hang
> >>   Tx Queue <0>
> >>   TDH  
> >>   TDT  
> >>   next_to_use  
> >>   next_to_clean<9>
> >> buffer_info[next_to_clean]
> >>   time_stamp   
> >>   next_to_watch
> >>   jiffies  
> >>   next_to_watch.status <0>
> > 
> > Can you find out what memory attributes the guest is using for the
> > memory---and if it's uncached, why?
> 
> For USB, see "drivers/usb/core/hcd-pci.c", function usb_hcd_pci_probe():
> it uses ioremap_nocache().
> 
> On the "why", that ioremap_nocache() call can be tracked to
> 
> http://git.kernel.org/cgit/linux/kernel/git/tglx/history.git/commit/?id=a914dd8b
> 
> (Feb 2002), which predates the kernel's move to git. I guess
> ioremap_nocache() is used simply because USB host controllers are
> supposed to programmed like that.
> 
> And, from "arch/arm64/include/asm/io.h":
> 
> #define ioremap_nocache(addr, size) __ioremap((addr), (size),
> __pgprot(PROT_DEVICE_nGnRE))
> 
So this just means that these devices should be mapped as device memory
(like the VGA case before) right?  And therefore should work with Drew's
patches (assuming they are actually correct and you add the right QEMU
annotations to set the memory regions and non-cacheable), correct?

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [RFC/RFT PATCH v2 1/3] arm/arm64: pageattr: add set_memory_nc

2015-05-20 Thread Christoffer Dall
On Tue, May 19, 2015 at 12:18:54PM +0100, Catalin Marinas wrote:
> On Tue, May 19, 2015 at 11:03:22AM +0100, Andrew Jones wrote:
> > On Mon, May 18, 2015 at 04:53:03PM +0100, Catalin Marinas wrote:
> > > Another way would be to split the vma containing the non-cacheable
> > > memory so that you get a single vma with the vm_page_prot as
> > > Non-cacheable.
> > 
> > This sounds interesting. Actually, it even crossed my mind once when I
> > first saw that the vma would overwrite the attributes, but then, sigh,
> > I let my brain take a stupidity bath.
> > 
> > > 
> > > Yet another approach could be for KVM to mmap the necessary memory for
> > > Qemu via a file_operations.mmap call (but that's only for ranges outside
> > > the guest "RAM").
> > 
> > I guess I prefer the vma splitting, rather than this (the vma creating
> > with mmap), as it keeps the KVM interface from changing (as you point out
> > below). Well, unless there are other advantages to this that are worth
> > considering?
> 
> The advantage is that you don't need to deal with the mm internals in
> the KVM code.
> 
> But you can probably add such code directly to mm/ and reuse some of the
> existing code in there already as part of change_protection(),
> mprotect_fixup(), sys_mprotect(). Actually, once you split the vma and
> set the new protection (something similar to mprotect_fixup), it looks
> to me like you can just call change_protection(vma->vm_page_prot).
> 
> > > I didn't have time to follow these threads in details, but just to
> > > recap my understanding, we have two main use-cases:
> > > 
> > > 1. Qemu handling guest I/O to device (e.g. PCIe BARs)
> > > 2. Qemu emulating device DMA
> > > 
> > > For (1), I guess Qemu uses an anonymous mmap() and then tells KVM about
> > > this memory slot. The memory attributes in this case could be Device
> > > because that's how the guest would normally map it. The
> > > file_operations.mmap trick would work in this case but this means
> > > expanding the KVM ABI beyond just an ioctl().
> > > 
> > > For (2), since Qemu is writing to the guest "RAM" (e.g. video
> > > framebuffer allocated by the guest), I still think the simplest is to
> > > tell the guest (via DT) that such device is cache coherent rather than
> > > trying to remap the Qemu mapping as non-cacheable.
> > 
> > If we need a solution for (1), then I'd prefer that it work and be
> > applied to (2) as well. Anyway, I'm still not 100% sure we can count on
> > all guest types (booloaders, different OSes) to listen to us. They may
> > assume non-cacheable is typical and safe, and thus just do that always.
> > We can certainly change some of those bootloaders and OSes, but probably
> > not all of them.
> 
> That's fine by me. Once you get the vma splitting and attributes
> changing done, I think you get the second one for free.
> 
> Do we want to differentiate between Device and Normal Non-cacheable
> memory? Something like KVM_MEMSLOT_DEVICE?
> 
> Nitpick: I'm not sure whether "uncached" is clear enough. In Linux,
> pgprot_noncached() returns Strongly Ordered memory. For Normal
> Non-cachable we used pgprot_writecombine (e.g. a video framebuffer).
> 
> Maybe something like KVM_MEMSLOT_COHERENT meaning a request to KVM to
> ensure that guest and host access it coherently (which would mean
> writecombine for ARM). That's similar naming to functions like
> dma_alloc_coherent() that return cacheable or non-cacheable memory based
> on what the device supports. Anyway, I'm not to bothered with the
> naming.
> 
One thing to keep in mind for (2) is that QEMU is likely to do things
like calling regular memcpy() on the memory region, so mapping it as
device memory which would fault on unaligned accesses may be a problem,
so ideally there is a memory type for the user space mapping which
allows such behavior where we at the same time can guarantee the that
the mapping is coherent with the guest mapping through the S2
attributes.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v2 0/3] Add support for for GICv2m and MSIs to arm-virt

2015-05-24 Thread Christoffer Dall
On Wed, May 06, 2015 at 05:39:28PM +0100, Peter Maydell wrote:
> On 6 May 2015 at 17:33, Peter Maydell  wrote:
> > On 27 April 2015 at 18:31, Christoffer Dall  
> > wrote:
> >> Now when we have a host generic PCIe controller in the virt board, it
> >> would be nice to be able to use MSIs so that we can eventually enable
> >> VHOST with KVM.
> >>
> >> With these patches you can use MSIs with TCG and with KVM, but you still
> >> need some fixes for the mapping of the IRQ index to the GSI number for
> >> IRQFD to work.  A separate series that enables IRQFD and vhost
> >> is available: "ARM adaptations for vhost irqfd setup"
> >> https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg01054.html)
> >>
> >> Tested with KVM on XGene and with TCG by configuring a virtio-pci
> >> network adapter for the guest and verifying MSIs going through as
> >> expected.
> >
> > You forgot to change the QOM device name to 'arm-gicv2m', but
> > I'll fix that up as I apply this to target-arm.next.
> 
> ...except this series breaks booting of a linux guest using PCI
> on the virt board with aarch32:
> 
> PCI host bridge /pcie@1000 ranges:
>IO 0x3eff..0x3eff -> 0x
>   MEM 0x1000..0x3efe -> 0x1000
> pci-host-generic 3f00.pcie: PCI host bridge to bus :00
> pci_bus :00: root bus resource [bus 00-0f]
> pci_bus :00: root bus resource [io  0x-0x]
> pci_bus :00: root bus resource [mem 0x1000-0x3efe]
> PCI: bus0: Fast back to back transfers disabled
> pci :00:01.0: of_irq_parse_pci() failed with rc=-22
> pci :00:02.0: of_irq_parse_pci() failed with rc=-22
> pci :00:02.0: BAR 6: assigned [mem 0x1000-0x1003 pref]
> pci :00:01.0: BAR 1: assigned [mem 0x1004-0x10040fff]
> pci :00:02.0: BAR 1: assigned [mem 0x10041000-0x10041fff]
> pci :00:01.0: BAR 0: assigned [io  0x1000-0x103f]
> pci :00:02.0: BAR 0: assigned [io  0x1040-0x105f]
> virtio-pci :00:01.0: enabling device (0100 -> 0103)
> virtio-pci :00:02.0: enabling device (0100 -> 0103)
> virtio_blk: probe of virtio32 failed with error -22
> virtio_net: probe of virtio33 failed with error -22
> 
> (and without virtio-blk we don't mount our rootfs).
> 
Shanker figured out that this was due to me changing address-cells and
size-cells in the gic node and breaking the irq-map in the DT and
provided a fix.

I will send a new series.

Thanks,
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 4/4] target-arm: Add the GICv2m to the virt board

2015-05-24 Thread Christoffer Dall
Add a GICv2m device to the virt board to enable MSIs on the generic PCI
host controller.  We allocate 64 SPIs in the IRQ space for now (this can
be increased/decreased later) and map the GICv2m right after the GIC in
the memory map.

Signed-off-by: Christoffer Dall 
---
Changes since v2:
 - Factored out changes to GIC DT node to previous patch.
 - Renamed QOM type name to "arm-gicv2m"
Changes since v1:
 - Remove stray merge conflict line
 - Reworded commmit message.

 hw/arm/virt.c | 42 +-
 1 file changed, 41 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 6797c6f..2972bb3 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -45,6 +45,7 @@
 #include "hw/pci-host/gpex.h"
 
 #define NUM_VIRTIO_TRANSPORTS 32
+#define NUM_GICV2M_SPIS 64
 
 /* Number of external interrupt lines to configure the GIC with */
 #define NUM_IRQS 128
@@ -71,6 +72,7 @@ enum {
 VIRT_RTC,
 VIRT_FW_CFG,
 VIRT_PCIE,
+VIRT_GIC_V2M,
 };
 
 typedef struct MemMapEntry {
@@ -88,6 +90,7 @@ typedef struct VirtBoardInfo {
 int fdt_size;
 uint32_t clock_phandle;
 uint32_t gic_phandle;
+uint32_t v2m_phandle;
 } VirtBoardInfo;
 
 typedef struct {
@@ -127,6 +130,7 @@ static const MemMapEntry a15memmap[] = {
 /* GIC distributor and CPU interfaces sit inside the CPU peripheral space 
*/
 [VIRT_GIC_DIST] =   { 0x0800, 0x0001 },
 [VIRT_GIC_CPU] ={ 0x0801, 0x0001 },
+[VIRT_GIC_V2M] ={ 0x0802, 0x1000 },
 [VIRT_UART] =   { 0x0900, 0x1000 },
 [VIRT_RTC] ={ 0x0901, 0x1000 },
 [VIRT_FW_CFG] = { 0x0902, 0x000a },
@@ -148,6 +152,7 @@ static const int a15irqmap[] = {
 [VIRT_RTC] = 2,
 [VIRT_PCIE] = 3, /* ... to 6 */
 [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
+[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
 };
 
 static VirtBoardInfo machines[] = {
@@ -323,9 +328,21 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 }
 }
 
-static void fdt_add_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
 {
+vbi->v2m_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
+qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+"arm,gic-v2m-frame");
+qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
+ 2, vbi->memmap[VIRT_GIC_V2M].base,
+ 2, vbi->memmap[VIRT_GIC_V2M].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
+}
 
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
+{
 vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
 qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
 
@@ -347,6 +364,25 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi)
 
 }
 
+static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+{
+int i;
+int irq = vbi->irqmap[VIRT_GIC_V2M];
+DeviceState *dev;
+
+dev = qdev_create(NULL, "arm-gicv2m");
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+qdev_prop_set_uint32(dev, "base-spi", irq);
+qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
+qdev_init_nofail(dev);
+
+for (i = 0; i < NUM_GICV2M_SPIS; i++) {
+sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
+}
+
+fdt_add_v2m_gic_node(vbi);
+}
+
 static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 /* We create a standalone GIC v2 */
@@ -397,6 +433,8 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 }
 
 fdt_add_gic_node(vbi);
+
+create_v2m(vbi, pic);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -707,6 +745,8 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq 
*pic)
 qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
 
+qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent", vbi->v2m_phandle);
+
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
  2, base_ecam, 2, size_ecam);
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 3/4] target-arm: Extend the gic node properties

2015-05-24 Thread Christoffer Dall
From: Shanker Donthineni 

In preparation for adding the GICv2m which requires address specifiers
and is a subnode of the gic, we extend the gic DT definition to specify
the #address-cells and #size-cells properties and add an empty ranges
property properties of the DT node, since this is required to add the
v2m node as a child of the gic node.

Note that we must also expand the irq-map to reference the gic with the
right address-cells as a consequnce of this change.

Signed-off-by: Shanker Donthineni 
Signed-off-by: Christoffer Dall 
---
Changes since v2:
 - New separate patch factoring out changes to existing code for eased
   bisectability in case we broke something
 - The above fixes the issue with non-MSI compatible guests.

 hw/arm/virt.c | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index f9f7482..6797c6f 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -340,7 +340,11 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi)
  2, vbi->memmap[VIRT_GIC_DIST].size,
  2, vbi->memmap[VIRT_GIC_CPU].base,
  2, vbi->memmap[VIRT_GIC_CPU].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
+qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
+
 }
 
 static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
@@ -604,11 +608,12 @@ static void create_fw_cfg(const VirtBoardInfo *vbi)
 g_free(nodename);
 }
 
+#define PCIE_IRQMAP_LEN 10
 static void create_pcie_irq_map(const VirtBoardInfo *vbi, uint32_t gic_phandle,
 int first_irq, const char *nodename)
 {
 int devfn, pin;
-uint32_t full_irq_map[4 * 4 * 8] = { 0 };
+uint32_t full_irq_map[4 * 4 * PCIE_IRQMAP_LEN] = { 0 };
 uint32_t *irq_map = full_irq_map;
 
 for (devfn = 0; devfn <= 0x18; devfn += 0x8) {
@@ -619,15 +624,15 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, 
uint32_t gic_phandle,
 int i;
 
 uint32_t map[] = {
-devfn << 8, 0, 0,   /* devfn */
-pin + 1,/* PCI pin */
-gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
+devfn << 8, 0, 0, /* devfn */
+pin + 1,  /* PCI pin */
+gic_phandle, 0, 0, irq_type, irq_nr, irq_level }; /* GIC irq */
 
 /* Convert map to big endian */
-for (i = 0; i < 8; i++) {
+for (i = 0; i < PCIE_IRQMAP_LEN; i++) {
 irq_map[i] = cpu_to_be32(map[i]);
 }
-irq_map += 8;
+irq_map += PCIE_IRQMAP_LEN;
 }
 }
 
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 0/4] Add support for for GICv2m and MSIs to arm-virt

2015-05-24 Thread Christoffer Dall
Now when we have a host generic PCIe controller in the virt board, it
would be nice to be able to use MSIs so that we can eventually enable
VHOST with KVM.

With these patches you can use MSIs with TCG and with KVM, but you still
need some fixes for the mapping of the IRQ index to the GSI number for
IRQFD to work.  A separate series that enables IRQFD and vhost
is available: "ARM adaptations for vhost irqfd setup"
https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg01054.html)

Tested with KVM on XGene and with TCG by configuring a virtio-pci
network adapter for the guest and verifying MSIs going through as
expected.

See the individual patches for changelogs.

Christoffer Dall (3):
  target-arm: Add GIC phandle to VirtBoardInfo
  arm_gicv2m: Add GICv2m widget to support MSIs
  target-arm: Add the GICv2m to the virt board

Shanker Donthineni (1):
  target-arm: Extend the gic node properties

 hw/arm/virt.c |  81 +++--
 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 3 files changed, 252 insertions(+), 20 deletions(-)
 create mode 100644 hw/intc/arm_gicv2m.c

-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v3 2/4] arm_gicv2m: Add GICv2m widget to support MSIs

2015-05-24 Thread Christoffer Dall
The ARM GICv2m widget is a little device that handles MSI interrupt
writes to a trigger register and ties them to a range of interrupt lines
wires to the GIC.  It has a few status/id registers and the interrupt wires,
and that's about it.

A board instantiates the device by setting the base SPI number and
number SPIs for the frame.  The base-spi parameter is indexed in the SPI
number space only, so base-spi == 0, means IRQ number 32.  When a device
(the PCI host controller) writes to the trigger register, the payload is
the GIC IRQ number, so we have to subtract 32 from that and then index
into our frame of SPIs.

When instantiating a GICv2m device, tell PCI that we have instantiated
something that can deal with MSIs.  We rely on the board actually wiring
up the GICv2m to the PCI host controller.

Signed-off-by: Christoffer Dall 
---
Changes since v2:
 - Renamed QOM type to "arm-gicv2m"
Changes since v1:
 - Check that writes to MSI_SETSPI are within the lower boundary as well
 - Move gicv2m to common-obj in Makefile
 - Separate switch case and comment for impdef regs
 - Clearly document what is emulated
 - Allow 16 bit lower accesses to MSI_SETSPI regs
 - Fix commit grammar error
 - Remove stray pixman commit

 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 2 files changed, 191 insertions(+)
 create mode 100644 hw/intc/arm_gicv2m.c

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 843864a..092d8a8 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -11,6 +11,7 @@ common-obj-$(CONFIG_SLAVIO) += slavio_intctl.o
 common-obj-$(CONFIG_IOAPIC) += ioapic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv2m.o
 common-obj-$(CONFIG_OPENPIC) += openpic.o
 
 obj-$(CONFIG_APIC) += apic.o apic_common.o
diff --git a/hw/intc/arm_gicv2m.c b/hw/intc/arm_gicv2m.c
new file mode 100644
index 000..9f84f72
--- /dev/null
+++ b/hw/intc/arm_gicv2m.c
@@ -0,0 +1,190 @@
+/*
+ *  GICv2m extension for MSI/MSI-x support with a GICv2-based system
+ *
+ * Copyright (C) 2015 Linaro, All rights reserved.
+ *
+ * Author: Christoffer Dall 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This file implements an emulated GICv2m widget as described in the ARM
+ * Server Base System Architecture (SBSA) specification Version 2.2
+ * (ARM-DEN-0029 v2.2) pages 35-39 without any optional implementation defined
+ * identification registers and with a single non-secure MSI register frame.
+ */
+
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+
+#define TYPE_ARM_GICV2M "arm-gicv2m"
+#define ARM_GICV2M(obj) OBJECT_CHECK(ARMGICv2mState, (obj), TYPE_ARM_GICV2M)
+
+#define GICV2M_NUM_SPI_MAX 128
+
+#define V2M_MSI_TYPER   0x008
+#define V2M_MSI_SETSPI_NS   0x040
+#define V2M_MSI_IIDR0xFCC
+#define V2M_IIDR0   0xFD0
+#define V2M_IIDR11  0xFFC
+
+#define PRODUCT_ID_QEMU 0x51 /* ASCII code Q */
+
+typedef struct ARMGICv2mState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+qemu_irq spi[GICV2M_NUM_SPI_MAX];
+
+uint32_t base_spi;
+uint32_t num_spi;
+} ARMGICv2mState;
+
+static void gicv2m_set_irq(void *opaque, int irq)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+
+qemu_irq_pulse(s->spi[irq]);
+}
+
+static uint64_t gicv2m_read(void *opaque, hwaddr offset,
+unsigned size)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+uint32_t val;
+
+if (size != 4) {
+qemu_log_mask(LOG_GUEST_ERROR, "gicv2m_read: bad size %u\n", size);
+return 0;
+}
+
+switch (offset) {
+case V2M_MSI_TYPER:
+val = (s->base_spi + 32) << 16;
+val |= s->num_spi;
+return val;
+case V2M_MSI_IIDR:
+/* We don't have any valid implementor so we leave that field as zero
+ * and we return 0 in the arch revision as per the spec.
+ */
+return (PRODUCT_ID_QEMU << 20);
+case V2M_IIDR0 ... V2M_IIDR11:
+/* We do not implement any optional identification registers and the
+ * mandatory MSI_PIDR2 register reads as 0x0, so we capture all
+   

[PATCH v3 1/4] target-arm: Add GIC phandle to VirtBoardInfo

2015-05-24 Thread Christoffer Dall
Instead of passing the GIC phandle around between functions, add it to
the VirtBoardInfo just like we do for the clock_phandle.  We are about
to add the v2m phandle as well, and it's easier not having to pass
around a bunch of phandles, return multiple values from functions, etc.

Reviewed-by: Peter Maydell 
Signed-off-by: Christoffer Dall 
---
Changes since v2:
 - None
Changes since v1:
 - Added reviewed-by tag

 hw/arm/virt.c | 26 +++---
 1 file changed, 11 insertions(+), 15 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index a7f9a10..f9f7482 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -87,6 +87,7 @@ typedef struct VirtBoardInfo {
 void *fdt;
 int fdt_size;
 uint32_t clock_phandle;
+uint32_t gic_phandle;
 } VirtBoardInfo;
 
 typedef struct {
@@ -322,12 +323,11 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 }
 }
 
-static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
 {
-uint32_t gic_phandle;
 
-gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
-qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", gic_phandle);
+vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
 
 qemu_fdt_add_subnode(vbi->fdt, "/intc");
 /* 'cortex-a15-gic' means 'GIC v2' */
@@ -340,12 +340,10 @@ static uint32_t fdt_add_gic_node(const VirtBoardInfo *vbi)
  2, vbi->memmap[VIRT_GIC_DIST].size,
  2, vbi->memmap[VIRT_GIC_CPU].base,
  2, vbi->memmap[VIRT_GIC_CPU].size);
-qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", gic_phandle);
-
-return gic_phandle;
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
-static uint32_t create_gic(const VirtBoardInfo *vbi, qemu_irq *pic)
+static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 /* We create a standalone GIC v2 */
 DeviceState *gicdev;
@@ -394,7 +392,7 @@ static uint32_t create_gic(const VirtBoardInfo *vbi, 
qemu_irq *pic)
 pic[i] = qdev_get_gpio_in(gicdev, i);
 }
 
-return fdt_add_gic_node(vbi);
+fdt_add_gic_node(vbi);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -641,8 +639,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, 
uint32_t gic_phandle,
0x7   /* PCI irq */);
 }
 
-static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic,
-uint32_t gic_phandle)
+static void create_pcie(const VirtBoardInfo *vbi, qemu_irq *pic)
 {
 hwaddr base = vbi->memmap[VIRT_PCIE].base;
 hwaddr size = vbi->memmap[VIRT_PCIE].size;
@@ -714,7 +711,7 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq 
*pic,
  2, base_mmio, 2, size_mmio);
 
 qemu_fdt_setprop_cell(vbi->fdt, nodename, "#interrupt-cells", 1);
-create_pcie_irq_map(vbi, gic_phandle, irq, nodename);
+create_pcie_irq_map(vbi, vbi->gic_phandle, irq, nodename);
 
 g_free(nodename);
 }
@@ -736,7 +733,6 @@ static void machvirt_init(MachineState *machine)
 MemoryRegion *ram = g_new(MemoryRegion, 1);
 const char *cpu_model = machine->cpu_model;
 VirtBoardInfo *vbi;
-uint32_t gic_phandle;
 char **cpustr;
 
 if (!cpu_model) {
@@ -813,13 +809,13 @@ static void machvirt_init(MachineState *machine)
 
 create_flash(vbi);
 
-gic_phandle = create_gic(vbi, pic);
+create_gic(vbi, pic);
 
 create_uart(vbi, pic);
 
 create_rtc(vbi, pic);
 
-create_pcie(vbi, pic, gic_phandle);
+create_pcie(vbi, pic);
 
 /* Create mmio transports, so the user can create virtio backends
  * (which will be automatically plugged in to the transports). If
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [Qemu-devel] [PATCH v3 4/4] target-arm: Add the GICv2m to the virt board

2015-05-25 Thread Christoffer Dall
Hi Pavel,

On Mon, May 25, 2015 at 04:09:58PM +0300, Pavel Fedin wrote:
>  Hello!
> 
> >  typedef struct MemMapEntry {
> > @@ -88,6 +90,7 @@ typedef struct VirtBoardInfo {
> >  int fdt_size;
> >  uint32_t clock_phandle;
> >  uint32_t gic_phandle;
> > +uint32_t v2m_phandle;
> >  } VirtBoardInfo;
> 
>  Could you rename v2m_phandle to something more neutral like msi_phandle ? It 
> will also be
> used by GICv3 ITS implementation.
> 
That's sort of how to speculate about.  Why can't those patches just
rename the variable then?  Right now, as the code stands, msi_phandle
would be less clear IMHO.

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH] arm/arm64: KVM: Propertly account for guest CPU time

2015-05-28 Thread Christoffer Dall
Until now we have been calling kvm_guest_exit after re-enabling
interrupts when we come back from the guest, but this has the
unfortunate effect that CPU time accounting done in the context of timer
interrupts doesn't properly notice that the time since the last tick was
spent in the guest.

Inspired by the comment in the x86 code, simply move the
kvm_guest_exit() call below the local_irq_enable() call and change
__kvm_guest_exit() to kvm_guest_exit(), because we are now calling this
function with interrupts enabled.  Note that AFAIU we don't need an
explicit barrier like x86 because the arm/arm64 implementation of
local_irq_(en/dis)able has an implicit barrier.

At the same time, move the trace_kvm_exit() call outside of the atomic
section, since there is no reason for us to do that with interrupts
disabled.

Signed-off-by: Christoffer Dall 
---
This patch is based on kvm/queue, because it has the kvm_guest_enter/exit
rework recently posted by Christian Borntraeger.  I hope I got the logic
of this wrong, there were 2 slightly worrying facts about this:

First, we now enable and disable and enable interrupts on each exit
path, but I couldn't see any performance overhead on hackbench - yes the
only benchmark we care abotu.

Second, looking at the power and mips code, they seem to also call
kvm_guest_exit() before enabling interrupts, so I don't understand how
guest CPU time accounting works on those architectures.

 arch/arm/kvm/arm.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e41cb11..bd0e463 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -559,8 +559,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
vcpu->mode = OUTSIDE_GUEST_MODE;
-   __kvm_guest_exit();
-   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+   /*
+* Back from guest
+*/
+
/*
 * We may have taken a host interrupt in HYP mode (ie
 * while executing the guest). This interrupt is still
@@ -574,8 +576,14 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
local_irq_enable();
 
/*
-* Back from guest
-*/
+* We do local_irq_enable() before calling kvm_guest_exit so
+* that the cputime accounting done in the context of timer
+* interrupts properly accounts time spent in the guest as
+* guest time.
+*/
+   kvm_guest_exit();
+   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+
 
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH v3 4/5] arm64: alternative: Introduce feature for GICv3 CPU interface

2015-05-28 Thread Christoffer Dall
On Thu, May 28, 2015 at 10:27:14AM +0100, Marc Zyngier wrote:
> On 14/05/15 12:25, Christoffer Dall wrote:
> > On Fri, Mar 27, 2015 at 01:09:24PM +, Marc Zyngier wrote:
> >> Add a new item to the feature set (ARM64_HAS_SYSREG_GIC_CPUIF)
> >> to indicate that we have a system register GIC CPU interface
> >>
> >> This will help KVM switching to alternative instruction patching.
> >>
> >> Reviewed-by: Andre Przywara 
> >> Acked-by: Will Deacon 
> >> Signed-off-by: Marc Zyngier 
> >> ---
> >>  arch/arm64/include/asm/cpufeature.h |  8 +++-
> >>  arch/arm64/kernel/cpufeature.c  | 16 
> >>  2 files changed, 23 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/arch/arm64/include/asm/cpufeature.h 
> >> b/arch/arm64/include/asm/cpufeature.h
> >> index 6ae35d1..d9e57b5 100644
> >> --- a/arch/arm64/include/asm/cpufeature.h
> >> +++ b/arch/arm64/include/asm/cpufeature.h
> >> @@ -23,8 +23,9 @@
> >>  
> >>  #define ARM64_WORKAROUND_CLEAN_CACHE  0
> >>  #define ARM64_WORKAROUND_DEVICE_LOAD_ACQUIRE  1
> >> +#define ARM64_HAS_SYSREG_GIC_CPUIF2
> >>  
> >> -#define ARM64_NCAPS   2
> >> +#define ARM64_NCAPS   3
> >>  
> >>  #ifndef __ASSEMBLY__
> >>  
> >> @@ -37,6 +38,11 @@ struct arm64_cpu_capabilities {
> >>u32 midr_model;
> >>u32 midr_range_min, midr_range_max;
> >>};
> >> +
> >> +  struct {/* Feature register checking */
> >> +  u64 register_mask;
> >> +  u64 register_value;
> >> +  };
> >>};
> >>  };
> >>  
> >> diff --git a/arch/arm64/kernel/cpufeature.c 
> >> b/arch/arm64/kernel/cpufeature.c
> >> index 3d9967e..b0bea2b3 100644
> >> --- a/arch/arm64/kernel/cpufeature.c
> >> +++ b/arch/arm64/kernel/cpufeature.c
> >> @@ -22,7 +22,23 @@
> >>  #include 
> >>  #include 
> >>  
> >> +static bool
> >> +has_id_aa64pfr0_feature(const struct arm64_cpu_capabilities *entry)
> >> +{
> >> +  u64 val;
> >> +
> >> +  val = read_cpuid(id_aa64pfr0_el1);
> > 
> > is this preferred compared to fishing it out of cpuinfo ?
> 
> Probably for the moment, yes. At some point, we should be able to have a
> consolidated set of features, consistent across all CPUs in the system.
> Once we have that, we should revisit this detection mecanism.
> 
> >> +  return (val & entry->register_mask) == entry->register_value;
> >> +}
> >> +
> >>  static const struct arm64_cpu_capabilities arm64_features[] = {
> >> +  {
> >> +  .desc = "system register GIC CPU interface",
> >> +  .capability = ARM64_HAS_SYSREG_GIC_CPUIF,
> >> +  .matches = has_id_aa64pfr0_feature,
> >> +  .register_mask = (0xf << 24),
> >> +  .register_value = (1 << 24),
> > 
> > I don't know if it's worth defining these masks in some header file.
> > The only other place I could see them used was in head.S.
> 
> Mark was looking at this a while ago. Maybe a task for a sleepless
> night? ;-)
> 

yeah, you can add this to the patch if it helps:

Reviewed-by: Christoffer Dall 

-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


Re: [PATCH] arm/arm64: KVM: Propertly account for guest CPU time

2015-05-28 Thread Christoffer Dall
On Thu, May 28, 2015 at 02:49:09PM +0200, Christoffer Dall wrote:
> Until now we have been calling kvm_guest_exit after re-enabling
> interrupts when we come back from the guest, but this has the
> unfortunate effect that CPU time accounting done in the context of timer
> interrupts doesn't properly notice that the time since the last tick was
> spent in the guest.
> 
> Inspired by the comment in the x86 code, simply move the
> kvm_guest_exit() call below the local_irq_enable() call and change
> __kvm_guest_exit() to kvm_guest_exit(), because we are now calling this
> function with interrupts enabled.  Note that AFAIU we don't need an
> explicit barrier like x86 because the arm/arm64 implementation of
> local_irq_(en/dis)able has an implicit barrier.
> 
> At the same time, move the trace_kvm_exit() call outside of the atomic
> section, since there is no reason for us to do that with interrupts
> disabled.
> 
> Signed-off-by: Christoffer Dall 
> ---
> This patch is based on kvm/queue, because it has the kvm_guest_enter/exit
> rework recently posted by Christian Borntraeger.  I hope I got the logic
> of this wrong, there were 2 slightly worrying facts about this:

Of course this should have been:

"I hope I got the logic of this *right*, but there..."

Damn it!
-Christoffer
___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v2] arm/arm64: KVM: Properly account for guest CPU time

2015-05-28 Thread Christoffer Dall
Until now we have been calling kvm_guest_exit after re-enabling
interrupts when we come back from the guest, but this has the
unfortunate effect that CPU time accounting done in the context of timer
interrupts occurring while the guest is running doesn't properly notice
that the time since the last tick was spent in the guest.

Inspired by the comment in the x86 code, move the kvm_guest_exit() call
below the local_irq_enable() call and change __kvm_guest_exit() to
kvm_guest_exit(), because we are now calling this function with
interrupts enabled.  We have to now explicitly disable preemption and
not enable preemption before we've called kvm_guest_exit(), since
otherwise we could be preempted and everything happening before we
eventually get scheduled again would be accounted for as guest time.

At the same time, move the trace_kvm_exit() call outside of the atomic
section, since there is no reason for us to do that with interrupts
disabled.

Signed-off-by: Christoffer Dall 
---
This patch is based on kvm/queue, because it has the kvm_guest_enter/exit
rework recently posted by Christian Borntraeger.  I hope I got the logic
of this right, there were 2 slightly worrying facts about this:

First, we now enable and disable and enable interrupts on each exit
path, but I couldn't see any performance overhead on hackbench - yes the
only benchmark we care about.

Second, looking at the ppc and mips code, they seem to also call
kvm_guest_exit() before enabling interrupts, so I don't understand how
guest CPU time accounting works on those architectures.

Changes since v1:
 - Tweak comment and commit text based on Marc's feedback.
 - Explicitly disable preemption and enable it only after kvm_guest_exit().

 arch/arm/kvm/arm.c | 21 +
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index e41cb11..fe8028d 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -532,6 +532,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
kvm_vgic_flush_hwstate(vcpu);
kvm_timer_flush_hwstate(vcpu);
 
+   preempt_disable();
local_irq_disable();
 
/*
@@ -544,6 +545,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
 
if (ret <= 0 || need_new_vmid_gen(vcpu->kvm)) {
local_irq_enable();
+   preempt_enable();
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
continue;
@@ -559,8 +561,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
ret = kvm_call_hyp(__kvm_vcpu_run, vcpu);
 
vcpu->mode = OUTSIDE_GUEST_MODE;
-   __kvm_guest_exit();
-   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+   /*
+* Back from guest
+*/
+
/*
 * We may have taken a host interrupt in HYP mode (ie
 * while executing the guest). This interrupt is still
@@ -574,8 +578,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct 
kvm_run *run)
local_irq_enable();
 
/*
-* Back from guest
-*/
+* We do local_irq_enable() before calling kvm_guest_exit() so
+* that if a timer interrupt hits while running the guest we
+* account that tick as being spent in the guest.  We enable
+* preemption after calling kvm_guest_exit() so that if we get
+* preempted we make sure ticks after that is not counted as
+* guest time.
+*/
+   kvm_guest_exit();
+   trace_kvm_exit(kvm_vcpu_trap_get_class(vcpu), *vcpu_pc(vcpu));
+   preempt_enable();
+
 
kvm_timer_sync_hwstate(vcpu);
kvm_vgic_sync_hwstate(vcpu);
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 0/4] Add support for for GICv2m and MSIs to arm-virt

2015-05-29 Thread Christoffer Dall
Now when we have a host generic PCIe controller in the virt board, it
would be nice to be able to use MSIs so that we can eventually enable
VHOST with KVM.

With these patches you can use MSIs with TCG and with KVM, but you still
need some fixes for the mapping of the IRQ index to the GSI number for
IRQFD to work.  A separate series that enables IRQFD and vhost
is available: "ARM adaptations for vhost irqfd setup"
https://lists.gnu.org/archive/html/qemu-devel/2015-04/msg01054.html)

Tested with KVM on XGene and with TCG by configuring a virtio-pci
network adapter for the guest and verifying MSIs going through as
expected.

Rebased on target-arm.next, see the individual patches for detailed
changelogs.

Christoffer Dall (4):
  target-arm: Add GIC phandle to VirtBoardInfo
  arm_gicv2m: Add GICv2m widget to support MSIs
  target-arm: Extend the gic node properties
  target-arm: Add the GICv2m to the virt board

 hw/arm/virt.c |  73 ++-
 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 include/hw/arm/virt.h |   2 +
 4 files changed, 248 insertions(+), 18 deletions(-)
 create mode 100644 hw/intc/arm_gicv2m.c

-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 4/4] target-arm: Add the GICv2m to the virt board

2015-05-29 Thread Christoffer Dall
Add a GICv2m device to the virt board to enable MSIs on the generic PCI
host controller.  We allocate 64 SPIs in the IRQ space for now (this can
be increased/decreased later) and map the GICv2m right after the GIC in
the memory map.

Reviewed-by: Eric Auger 
Signed-off-by: Christoffer Dall 
---
Changes since v3:
 - Rebased on target-arm.next, so moved some definitions to virt.h
 - Added reviewed-by tag
Changes since v2:
 - Factored out changes to GIC DT node to previous patch.
 - Renamed QOM type name to "arm-gicv2m"
Changes since v1:
 - Remove stray merge conflict line
 - Reworded commmit message.

 hw/arm/virt.c | 40 +++-
 include/hw/arm/virt.h |  2 ++
 2 files changed, 41 insertions(+), 1 deletion(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index 387dac8..4bb7175 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -70,6 +70,7 @@ typedef struct VirtBoardInfo {
 int fdt_size;
 uint32_t clock_phandle;
 uint32_t gic_phandle;
+uint32_t v2m_phandle;
 } VirtBoardInfo;
 
 typedef struct {
@@ -109,6 +110,7 @@ static const MemMapEntry a15memmap[] = {
 /* GIC distributor and CPU interfaces sit inside the CPU peripheral space 
*/
 [VIRT_GIC_DIST] =   { 0x0800, 0x0001 },
 [VIRT_GIC_CPU] ={ 0x0801, 0x0001 },
+[VIRT_GIC_V2M] ={ 0x0802, 0x1000 },
 [VIRT_UART] =   { 0x0900, 0x1000 },
 [VIRT_RTC] ={ 0x0901, 0x1000 },
 [VIRT_FW_CFG] = { 0x0902, 0x000a },
@@ -125,6 +127,7 @@ static const int a15irqmap[] = {
 [VIRT_RTC] = 2,
 [VIRT_PCIE] = 3, /* ... to 6 */
 [VIRT_MMIO] = 16, /* ...to 16 + NUM_VIRTIO_TRANSPORTS - 1 */
+[VIRT_GIC_V2M] = 48, /* ...to 48 + NUM_GICV2M_SPIS - 1 */
 };
 
 static VirtBoardInfo machines[] = {
@@ -300,9 +303,21 @@ static void fdt_add_cpu_nodes(const VirtBoardInfo *vbi)
 }
 }
 
-static void fdt_add_gic_node(VirtBoardInfo *vbi)
+static void fdt_add_v2m_gic_node(VirtBoardInfo *vbi)
 {
+vbi->v2m_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
+qemu_fdt_add_subnode(vbi->fdt, "/intc/v2m");
+qemu_fdt_setprop_string(vbi->fdt, "/intc/v2m", "compatible",
+"arm,gic-v2m-frame");
+qemu_fdt_setprop(vbi->fdt, "/intc/v2m", "msi-controller", NULL, 0);
+qemu_fdt_setprop_sized_cells(vbi->fdt, "/intc/v2m", "reg",
+ 2, vbi->memmap[VIRT_GIC_V2M].base,
+ 2, vbi->memmap[VIRT_GIC_V2M].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc/v2m", "phandle", vbi->v2m_phandle);
+}
 
+static void fdt_add_gic_node(VirtBoardInfo *vbi)
+{
 vbi->gic_phandle = qemu_fdt_alloc_phandle(vbi->fdt);
 qemu_fdt_setprop_cell(vbi->fdt, "/", "interrupt-parent", vbi->gic_phandle);
 
@@ -323,6 +338,25 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi)
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
+static void create_v2m(VirtBoardInfo *vbi, qemu_irq *pic)
+{
+int i;
+int irq = vbi->irqmap[VIRT_GIC_V2M];
+DeviceState *dev;
+
+dev = qdev_create(NULL, "arm-gicv2m");
+sysbus_mmio_map(SYS_BUS_DEVICE(dev), 0, vbi->memmap[VIRT_GIC_V2M].base);
+qdev_prop_set_uint32(dev, "base-spi", irq);
+qdev_prop_set_uint32(dev, "num-spi", NUM_GICV2M_SPIS);
+qdev_init_nofail(dev);
+
+for (i = 0; i < NUM_GICV2M_SPIS; i++) {
+sysbus_connect_irq(SYS_BUS_DEVICE(dev), i, pic[irq + i]);
+}
+
+fdt_add_v2m_gic_node(vbi);
+}
+
 static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 {
 /* We create a standalone GIC v2 */
@@ -373,6 +407,8 @@ static void create_gic(VirtBoardInfo *vbi, qemu_irq *pic)
 }
 
 fdt_add_gic_node(vbi);
+
+create_v2m(vbi, pic);
 }
 
 static void create_uart(const VirtBoardInfo *vbi, qemu_irq *pic)
@@ -676,6 +712,8 @@ static void create_pcie(const VirtBoardInfo *vbi, qemu_irq 
*pic)
 qemu_fdt_setprop_cells(vbi->fdt, nodename, "bus-range", 0,
nr_pcie_buses - 1);
 
+qemu_fdt_setprop_cells(vbi->fdt, nodename, "msi-parent", vbi->v2m_phandle);
+
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "reg",
  2, base_ecam, 2, size_ecam);
 qemu_fdt_setprop_sized_cells(vbi->fdt, nodename, "ranges",
diff --git a/include/hw/arm/virt.h b/include/hw/arm/virt.h
index ceec8b3..003ef29 100644
--- a/include/hw/arm/virt.h
+++ b/include/hw/arm/virt.h
@@ -32,6 +32,7 @@
 
 #include "qemu-common.h"
 
+#define NUM_GICV2M_SPIS   64
 #define NUM_VIRTIO_TRANSPORTS 32
 
 #define ARCH_TIMER_VIRT_IRQ   11
@@ -53,6 +54,7 @@ enum {
 VIRT_PCIE_MMIO,
 VIRT_PCIE_PIO,
 VIRT_PCIE_ECAM,
+VIRT_GIC_V2M,
 };
 
 typedef struct MemMapEntry {
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 3/4] target-arm: Extend the gic node properties

2015-05-29 Thread Christoffer Dall
In preparation for adding the GICv2m which requires address specifiers
and is a subnode of the gic, we extend the gic DT definition to specify
the #address-cells and #size-cells properties and add an empty ranges
property properties of the DT node, since this is required to add the
v2m node as a child of the gic node.

Note that we must also expand the irq-map to reference the gic with the
right address-cells as a consequence of this change.

Reviewed-by: Eric Auger 
Suggested-by: Shanker Donthineni 
Signed-off-by: Christoffer Dall 
---
Changes since v3:
 - Rewrote patch and changed authorship and tags accordingly
 - Fixed spelling in commit message
Changes since v2:
 - New separate patch factoring out changes to existing code for eased
   bisectability in case we broke something
 - The above fixes the issue with non-MSI compatible guests.

 hw/arm/virt.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/hw/arm/virt.c b/hw/arm/virt.c
index e5235ef..387dac8 100644
--- a/hw/arm/virt.c
+++ b/hw/arm/virt.c
@@ -317,6 +317,9 @@ static void fdt_add_gic_node(VirtBoardInfo *vbi)
  2, vbi->memmap[VIRT_GIC_DIST].size,
  2, vbi->memmap[VIRT_GIC_CPU].base,
  2, vbi->memmap[VIRT_GIC_CPU].size);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#address-cells", 0x2);
+qemu_fdt_setprop_cell(vbi->fdt, "/intc", "#size-cells", 0x2);
+qemu_fdt_setprop(vbi->fdt, "/intc", "ranges", NULL, 0);
 qemu_fdt_setprop_cell(vbi->fdt, "/intc", "phandle", vbi->gic_phandle);
 }
 
@@ -585,7 +588,7 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, 
uint32_t gic_phandle,
 int first_irq, const char *nodename)
 {
 int devfn, pin;
-uint32_t full_irq_map[4 * 4 * 8] = { 0 };
+uint32_t full_irq_map[4 * 4 * 10] = { 0 };
 uint32_t *irq_map = full_irq_map;
 
 for (devfn = 0; devfn <= 0x18; devfn += 0x8) {
@@ -598,13 +601,13 @@ static void create_pcie_irq_map(const VirtBoardInfo *vbi, 
uint32_t gic_phandle,
 uint32_t map[] = {
 devfn << 8, 0, 0,   /* devfn */
 pin + 1,/* PCI pin */
-gic_phandle, irq_type, irq_nr, irq_level }; /* GIC irq */
+gic_phandle, 0, 0, irq_type, irq_nr, irq_level }; /* GIC irq */
 
 /* Convert map to big endian */
-for (i = 0; i < 8; i++) {
+for (i = 0; i < 10; i++) {
 irq_map[i] = cpu_to_be32(map[i]);
 }
-irq_map += 8;
+irq_map += 10;
 }
 }
 
-- 
2.1.2.330.g565301e.dirty

___
kvmarm mailing list
kvmarm@lists.cs.columbia.edu
https://lists.cs.columbia.edu/mailman/listinfo/kvmarm


[PATCH v4 2/4] arm_gicv2m: Add GICv2m widget to support MSIs

2015-05-29 Thread Christoffer Dall
The ARM GICv2m widget is a little device that handles MSI interrupt
writes to a trigger register and ties them to a range of interrupt lines
wires to the GIC.  It has a few status/id registers and the interrupt wires,
and that's about it.

A board instantiates the device by setting the base SPI number and
number SPIs for the frame.  The base-spi parameter is indexed in the SPI
number space only, so base-spi == 0, means IRQ number 32.  When a device
(the PCI host controller) writes to the trigger register, the payload is
the GIC IRQ number, so we have to subtract 32 from that and then index
into our frame of SPIs.

When instantiating a GICv2m device, tell PCI that we have instantiated
something that can deal with MSIs.  We rely on the board actually wiring
up the GICv2m to the PCI host controller.

Reviewed-by: Eric Auger 
Signed-off-by: Christoffer Dall 
---
Changes since v3:
 - Added reviewed-by tag
Changes since v2:
 - Renamed QOM type to "arm-gicv2m"
Changes since v1:
 - Check that writes to MSI_SETSPI are within the lower boundary as well
 - Move gicv2m to common-obj in Makefile
 - Separate switch case and comment for impdef regs
 - Clearly document what is emulated
 - Allow 16 bit lower accesses to MSI_SETSPI regs
 - Fix commit grammar error
 - Remove stray pixman commit

 hw/intc/Makefile.objs |   1 +
 hw/intc/arm_gicv2m.c  | 190 ++
 2 files changed, 191 insertions(+)
 create mode 100644 hw/intc/arm_gicv2m.c

diff --git a/hw/intc/Makefile.objs b/hw/intc/Makefile.objs
index 843864a..092d8a8 100644
--- a/hw/intc/Makefile.objs
+++ b/hw/intc/Makefile.objs
@@ -11,6 +11,7 @@ common-obj-$(CONFIG_SLAVIO) += slavio_intctl.o
 common-obj-$(CONFIG_IOAPIC) += ioapic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic_common.o
 common-obj-$(CONFIG_ARM_GIC) += arm_gic.o
+common-obj-$(CONFIG_ARM_GIC) += arm_gicv2m.o
 common-obj-$(CONFIG_OPENPIC) += openpic.o
 
 obj-$(CONFIG_APIC) += apic.o apic_common.o
diff --git a/hw/intc/arm_gicv2m.c b/hw/intc/arm_gicv2m.c
new file mode 100644
index 000..9f84f72
--- /dev/null
+++ b/hw/intc/arm_gicv2m.c
@@ -0,0 +1,190 @@
+/*
+ *  GICv2m extension for MSI/MSI-x support with a GICv2-based system
+ *
+ * Copyright (C) 2015 Linaro, All rights reserved.
+ *
+ * Author: Christoffer Dall 
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* This file implements an emulated GICv2m widget as described in the ARM
+ * Server Base System Architecture (SBSA) specification Version 2.2
+ * (ARM-DEN-0029 v2.2) pages 35-39 without any optional implementation defined
+ * identification registers and with a single non-secure MSI register frame.
+ */
+
+#include "hw/sysbus.h"
+#include "hw/pci/msi.h"
+
+#define TYPE_ARM_GICV2M "arm-gicv2m"
+#define ARM_GICV2M(obj) OBJECT_CHECK(ARMGICv2mState, (obj), TYPE_ARM_GICV2M)
+
+#define GICV2M_NUM_SPI_MAX 128
+
+#define V2M_MSI_TYPER   0x008
+#define V2M_MSI_SETSPI_NS   0x040
+#define V2M_MSI_IIDR0xFCC
+#define V2M_IIDR0   0xFD0
+#define V2M_IIDR11  0xFFC
+
+#define PRODUCT_ID_QEMU 0x51 /* ASCII code Q */
+
+typedef struct ARMGICv2mState {
+SysBusDevice parent_obj;
+
+MemoryRegion iomem;
+qemu_irq spi[GICV2M_NUM_SPI_MAX];
+
+uint32_t base_spi;
+uint32_t num_spi;
+} ARMGICv2mState;
+
+static void gicv2m_set_irq(void *opaque, int irq)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+
+qemu_irq_pulse(s->spi[irq]);
+}
+
+static uint64_t gicv2m_read(void *opaque, hwaddr offset,
+unsigned size)
+{
+ARMGICv2mState *s = (ARMGICv2mState *)opaque;
+uint32_t val;
+
+if (size != 4) {
+qemu_log_mask(LOG_GUEST_ERROR, "gicv2m_read: bad size %u\n", size);
+return 0;
+}
+
+switch (offset) {
+case V2M_MSI_TYPER:
+val = (s->base_spi + 32) << 16;
+val |= s->num_spi;
+return val;
+case V2M_MSI_IIDR:
+/* We don't have any valid implementor so we leave that field as zero
+ * and we return 0 in the arch revision as per the spec.
+ */
+return (PRODUCT_ID_QEMU << 20);
+case V2M_IIDR0 ... V2M_IIDR11:
+/* We do not implement any optional identification registers and the
+ * mandatory MSI_PIDR

  1   2   3   4   5   6   7   8   9   10   >